Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 28 additions & 16 deletions api/src/org/labkey/api/security/AuthFilter.java
Original file line number Diff line number Diff line change
Expand Up @@ -40,14 +40,16 @@
import org.labkey.api.util.HttpUtil;
import org.labkey.api.util.HttpsUtil;
import org.labkey.api.util.Pair;
import org.labkey.api.util.Rate;
import org.labkey.api.util.RateLimiter;
import org.labkey.api.view.UnauthorizedException;
import org.labkey.api.view.ViewServlet;
import org.labkey.api.view.template.PageConfig;

import java.io.IOException;
import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.util.Random;
import java.util.concurrent.TimeUnit;


@SuppressWarnings({"UnusedDeclaration"})
Expand Down Expand Up @@ -237,21 +239,9 @@ else if (!AppProps.getInstance().isDevMode())

QueryService.get().setEnvironment(QueryService.Environment.USER, user);

if (AppProps.getInstance().isOptionalFeatureEnabled("experimental-unsafe-inline"))
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you mean to delete this? Maybe since I don't see it being registered as an optional feature anywhere?

{
String csp = StringUtils.trimToEmpty(((HttpServletResponse) response).getHeader("Content-Security-Policy"));
String nonceDirectiveValue = "'nonce-" + PageConfig.getScriptNonceHeader(req) + "'";
if (!csp.contains(nonceDirectiveValue))
{
if (!csp.contains("script-src "))
{
if (StringUtils.isNotBlank(csp))
csp = StringUtils.appendIfMissing(csp, ";");
csp += "script-src 'unsafe-eval' http: https: " + nonceDirectiveValue + ";";
}
((HttpServletResponse) response).setHeader("Content-Security-Policy", csp);
}
}
// checkRateLimiterOverage() will set response status
if (user.isGuest() && checkRateLimiterOverage((AuthenticatedRequest)req, resp))
return;

try
{
Expand Down Expand Up @@ -322,4 +312,26 @@ private void ensureFirstRequestHandled(HttpServletRequest request)
_firstRequestHandled = true;
}
}

static final RateLimiter robotLimiter = new RateLimiter("Robots", new Rate(20, TimeUnit.MINUTES));

static boolean checkRateLimiterOverage(AuthenticatedRequest req, HttpServletResponse res)
{
var userAgent = req.getHeader("User-Agent");
/* NOTE: the health checker is currently considered a robot. Don't 429 the healthchecker */
if (StringUtils.contains(userAgent, "healthchecker"))
return false;
if (!req.isRobot())
return false;
var count = robotLimiter.getCount();
var delay = robotLimiter.getDelay();
if (count >= 10 && delay > 0)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This overall approach seems great to me, but I'll admit that I can't figure out what the effective limits will be. I'm confused on how 10 and 0 connect with the arguments to Rate above.

Regardless, to move this forward, we might want config to control this. Either a simple on/off switch or a way to control the allowed rate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes. Needs startup properties/configuration before being released into the wild. I'll add comments.

{
res.addHeader("Retry-After", String.valueOf((int)(delay/1000 + 5)));
res.setStatus(429); // TOO MANY REQUESTS why no HttpServletResoonse constant?
return true;
}
robotLimiter.add(1, false);
return false;
}
}
9 changes: 6 additions & 3 deletions api/src/org/labkey/api/security/AuthenticatedRequest.java
Original file line number Diff line number Diff line change
Expand Up @@ -231,12 +231,15 @@ private boolean isGuest()
return _user.isGuest() && !_loggedIn;
}

private boolean isRobot()
private Boolean _robot = null;

public boolean isRobot()
{
return PageFlowUtil.isRobotUserAgent(getHeader("User-Agent"));
if (null == _robot)
_robot = PageFlowUtil.isRobotUserAgent(getHeader("User-Agent"));
return _robot;
}


// Methods below use reflection to pull Tomcat-specific implementation bits out of the request. This can be helpful
// for low-level, temporary debugging, but it's not portable across servlet containers or versions.

Expand Down
2 changes: 2 additions & 0 deletions api/src/org/labkey/api/util/RateAccumulator.java
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ public C getCounter()
return _counter;
}

// Even with the max(1000) this can be a little aggressive when the time period is short.
// For now the caller can check getCount() if this is a concern.
double getRate(long now)
{
return (double)getCount() / max(1000, now - getStart());
Expand Down
6 changes: 3 additions & 3 deletions api/src/org/labkey/api/util/RateLimiter.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@

public class RateLimiter
{
static final Logger _log = LogManager.getLogger(RateLimiter.class);

final String _name;
final Rate _target;
boolean useSystem = false; // for small intervals or testing
Expand Down Expand Up @@ -140,11 +138,13 @@ private long _pause(long delay)
}


private synchronized long getDelay()
public synchronized long getDelay()
{
return _long.getDelay(currentTimeMillis(), _target);
}

public synchronized long getCount() { return _long.getCount(); }


private synchronized long _updateCounts(long count)
{
Expand Down
38 changes: 36 additions & 2 deletions api/src/org/labkey/api/view/PopupMenuView.java
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,14 @@

package org.labkey.api.view;

import org.apache.commons.lang3.StringUtils;
import org.labkey.api.util.PageFlowUtil;
import org.labkey.api.util.URLHelper;

import java.io.IOException;
import java.io.PrintWriter;
import java.io.Writer;
import java.net.URISyntaxException;

public class PopupMenuView extends HttpView<PopupMenu>
{
Expand Down Expand Up @@ -184,14 +187,45 @@ protected static void renderLink(NavTree item, String cls, Writer out) throws IO
if (item.isEmphasis())
styleStr += "font-style: italic;";

// NOTE: nofollow is not recommended as way to avoid crawling internal links
// instead let's use an onclick handler to "hide" the link
String dataQuery = null;
String href = item.getHref();
if (null != href && null == item.getScript() && !item.isPost())
{
try
{
var context = HttpView.currentContext();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like this might help the Container Filter menu items? Looks like they're href and not JS handlers. They already have rel="nofollow" but that's not enough?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

From my reading. nofollow doesn't actually mean "pretend this target link or page doesn't exist". It just means "don't give this link any weight in your magic SEO algorithm, I don't vouch for it". Google is going to crawl every link it can find.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I see.

Why the split between the href and the data-query?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is an important question. The interwebs seems to think google is very good at finding links in attributes and javascript. I am not sure that this change is sufficient, but it seemed worth trying to separate the parts so that the varying part does not look like a URL.

URLHelper url = new URLHelper(href);
if (null != context && context.isRobot())
url.addParameter("_noindex", "1");
dataQuery = StringUtils.trimToEmpty(url.getRawQuery());
if (!dataQuery.isEmpty())
dataQuery = "?" + dataQuery;
url.deleteParameters();
href = url.toString();
HttpView.currentPageConfig().addHandlerForQuerySelector(
"A.noFollowNavigate",
"click",
"window.location = this.href + this.dataset['query']; return false;");
cls = StringUtils.trimToEmpty(cls) + " noFollowNavigate";
}
catch (URISyntaxException e)
{
// fall through
}
}

String id = config.makeId("popupMenuView");
out.write("<a id='" + id + "'");
if (null != cls)
out.write(" class=\"" + cls + "\"");
if (null != item.getHref() && !item.isPost())
out.write(" href=\"" + PageFlowUtil.filter(item.getHref()) + "\"");
if (null != href && !item.isPost())
out.write(" href=\"" + PageFlowUtil.filter(href) + "\"");
else
out.write(" href=\"#\"");
if (null != dataQuery)
out.write(" data-query=\"" + PageFlowUtil.filter(dataQuery) + "\"");
if (null != item.getTarget())
out.write(" target=\"" + item.getTarget() + "\"");
if (null != item.getDescription())
Expand Down
7 changes: 7 additions & 0 deletions api/src/org/labkey/api/view/ViewContext.java
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.labkey.api.util.ContainerContext;
import org.labkey.api.util.HttpUtil;
import org.labkey.api.util.MemTracker;
import org.labkey.api.util.PageFlowUtil;
import org.labkey.api.writer.ContainerUser;
import org.springframework.beans.PropertyValues;
import org.springframework.context.ApplicationContext;
Expand Down Expand Up @@ -494,4 +495,10 @@ public void setAppView(boolean appView)
{
_isAppView = appView;
}

public boolean isRobot()
{
var r = getRequest();
return null != r && PageFlowUtil.isRobotUserAgent(r.getHeader("User-Agent"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,11 @@

if (model.getFrameOption() != PageConfig.FrameOption.ALLOW)
response.setHeader("X-FRAME-OPTIONS", model.getFrameOption().name());

if ("1".equals(url.getParameter("_noindex")))
model.setNoIndex();
if ("1".equals(url.getParameter("_nofollow")))
model.setNoFollow();
%>
<!DOCTYPE html>
<html lang="en">
Expand Down
10 changes: 10 additions & 0 deletions wiki/src/org/labkey/wiki/WikiController.java
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,8 @@
import java.util.TreeSet;
import java.util.stream.Collectors;

import static org.labkey.api.data.DataRegion.LAST_FILTER_PARAM;

public class WikiController extends SpringActionController
{
private static final Logger LOG = LogManager.getLogger(WikiController.class);
Expand Down Expand Up @@ -1122,6 +1124,14 @@ public PageAction(ViewContext ctx, Wiki wiki, WikiVersion wikiversion)
@Override
public ModelAndView getView(WikiNameForm form, BindException errors)
{
// Don't index page with non default parameters (e.g. targeting webparts in the page)
for (var e = getViewContext().getRequest().getParameterNames() ; e.hasMoreElements() ; )
{
String p = e.nextElement();
if (p.contains(".") && !LAST_FILTER_PARAM.equals(p))
getPageConfig().setNoIndex();
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe getPageConfig().setNoFollow() as well?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What's the use case here? An HTML wiki that uses QueryWebPart via a <script> tag? Something that uses the server-side webpart include syntax?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Exactly. This certainly isn't the only place this could be relevant (portal pages are probably more important). But letting the crawler go nuts on the pages with multiple data regions seems to exacerbate the combinatorics.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Open to other ideas, just trying to keep up with the bots.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This seems like it's worth trying. It shouldn't hurt and might help/.

}

String name = null != form.getName() ? form.getName().trim() : null;
//if there's no name parameter, find default page and reload with parameter.
//default page is not necessarily same page displayed in wiki web part
Expand Down