|
| 1 | +import type { Request } from 'express' |
| 2 | + |
| 3 | +import rateLimit from 'express-rate-limit' |
| 4 | + |
| 5 | +import statsd from '@/observability/lib/statsd.js' |
| 6 | +import { noCacheControl } from '@/frame/middleware/cache-control.js' |
| 7 | +import { isFastlyIP } from '@/shielding/lib/fastly-ips' |
| 8 | + |
| 9 | +const EXPIRES_IN_AS_SECONDS = 60 |
| 10 | + |
| 11 | +const MAX = process.env.RATE_LIMIT_MAX ? parseInt(process.env.RATE_LIMIT_MAX, 10) : 50 |
| 12 | +if (isNaN(MAX)) { |
| 13 | + throw new Error(`process.env.RATE_LIMIT_MAX (${process.env.RATE_LIMIT_MAX}) not a number`) |
| 14 | +} |
| 15 | + |
| 16 | +// We apply this rate limiter to _all_ routes in src/shielding/index.ts except for `/api/*` routes |
| 17 | +export function createRateLimiter(max = MAX, isAPILimiter = false) { |
| 18 | + return rateLimit({ |
| 19 | + // 1 minute |
| 20 | + windowMs: EXPIRES_IN_AS_SECONDS * 1000, |
| 21 | + // limit each IP to X requests per windowMs |
| 22 | + // We currently have about 12 instances in production. That's routed |
| 23 | + // in Moda to spread the requests to each healthy instance. |
| 24 | + // So, the true rate limit, per `windowMs`, is this number multiplied |
| 25 | + // by the current number of instances. |
| 26 | + max: max, |
| 27 | + |
| 28 | + // Return rate limit info in the `RateLimit-*` headers |
| 29 | + standardHeaders: true, |
| 30 | + // Disable the `X-RateLimit-*` headers |
| 31 | + legacyHeaders: false, |
| 32 | + |
| 33 | + keyGenerator: (req) => { |
| 34 | + return getClientIPFromReq(req) |
| 35 | + }, |
| 36 | + |
| 37 | + skip: async (req) => { |
| 38 | + const ip = getClientIPFromReq(req) |
| 39 | + if (await isFastlyIP(ip)) { |
| 40 | + return true |
| 41 | + } |
| 42 | + // IP is empty when we are in a non-production (not behind Fastly) environment |
| 43 | + // In these environments, we don't want to rate limit (including tests) |
| 44 | + // However, if you want to test rate limiting locally, you can manually set |
| 45 | + // the `fastly-client-ip` header to your IP address to bypass this check set the |
| 46 | + if (ip === '') { |
| 47 | + return true |
| 48 | + } |
| 49 | + |
| 50 | + // We handle /api/* routes with a separate rate limiter |
| 51 | + // When it is a separate rate limiter, isAPILimiter will be passed as true |
| 52 | + if (req.path.startsWith('/api/') || isAPILimiter) { |
| 53 | + return false |
| 54 | + } |
| 55 | + |
| 56 | + // If the request is not suspicious, don't rate limit it |
| 57 | + if (!isSuspiciousRequest(req)) { |
| 58 | + return true |
| 59 | + } |
| 60 | + |
| 61 | + // At this point, a request is suspicious. We want to track how many are in datadog |
| 62 | + const tags = [`url:${req.url}`, `ip:${ip}`, `path:${req.path}`, `qs:${req.url.split('?')[1]}`] |
| 63 | + statsd.increment('middleware.rate_limit_dont_skip', 1, tags) |
| 64 | + |
| 65 | + return false |
| 66 | + }, |
| 67 | + |
| 68 | + handler: (req, res, next, options) => { |
| 69 | + const tags = [`url:${req.url}`, `ip:${req.ip}`, `path:${req.path}`] |
| 70 | + statsd.increment('middleware.rate_limit', 1, tags) |
| 71 | + noCacheControl(res) |
| 72 | + res.status(options.statusCode).send(options.message) |
| 73 | + }, |
| 74 | + |
| 75 | + // Temporary so that we can see what is coming from Fastly v app level |
| 76 | + statusCode: 418, // "i'm a teapot" |
| 77 | + }) |
| 78 | +} |
| 79 | + |
| 80 | +function getClientIPFromReq(req: Request) { |
| 81 | + // Moda forwards the client's IP using the `fastly-client-ip` header. |
| 82 | + // However, in non-fastly environments, this header is not present. |
| 83 | + // Staging is behind Okta, so we don't need to rate limit there. |
| 84 | + let ip = req?.headers?.['fastly-client-ip'] || '' |
| 85 | + // This is to satisfy TypeScript since a header could be a string array, but fastly-client-ip is not |
| 86 | + if (typeof ip !== 'string') { |
| 87 | + ip = '' |
| 88 | + } |
| 89 | + return ip |
| 90 | +} |
| 91 | + |
| 92 | +const RECOGNIZED_KEYS_BY_PREFIX = { |
| 93 | + '/_next/data/': ['versionId', 'productId', 'restPage', 'apiVersion', 'category', 'subcategory'], |
| 94 | + '/api/search': ['query', 'language', 'version', 'page', 'product', 'autocomplete', 'limit'], |
| 95 | + '/api/anchor-redirect': ['hash', 'path'], |
| 96 | + '/api/webhooks': ['category', 'version'], |
| 97 | + '/api/pageinfo': ['pathname'], |
| 98 | +} |
| 99 | + |
| 100 | +const RECOGNIZED_KEYS = { |
| 101 | + search: ['query', 'page'], |
| 102 | +} |
| 103 | + |
| 104 | +const MISC_KEYS = [ |
| 105 | + // Learning track pages |
| 106 | + 'learn', |
| 107 | + 'learnProduct', |
| 108 | + |
| 109 | + // Platform picker |
| 110 | + 'platform', |
| 111 | + |
| 112 | + // Tool picker |
| 113 | + 'tool', |
| 114 | + |
| 115 | + // When apiVersion isn't the only one. E.g. ?apiVersion=XXX&tool=vscode |
| 116 | + 'apiVersion', |
| 117 | + |
| 118 | + // Lowercase for rest pages |
| 119 | + 'apiversion', |
| 120 | + |
| 121 | + // We use the query param "feature" to enable experiments in the browser |
| 122 | + 'feature', |
| 123 | +] |
| 124 | + |
| 125 | +/** |
| 126 | + * Return true if the request looks like a DoS request. I.e. suspicious. |
| 127 | + * |
| 128 | + * We've seen lots of requests slip past the CDN and its edge rate limiter |
| 129 | + * that clearly are not realistic URLs that you'd get in a browser. |
| 130 | + * For example `?action=octrh&api=h9vcd&immagine=jzs3c&lang=xb0kp&m=rrmek` |
| 131 | + * There are certain URLs that have query strings that are valid, but |
| 132 | + * have one more query string keys. In particular the `/api/..` endpoints. |
| 133 | + * |
| 134 | + * Remember, just because this function might return true, it doesn't mean |
| 135 | + * the request will be rate limited. It has to be both suspicious AND |
| 136 | + * have lots and lots of requests. |
| 137 | + * |
| 138 | + * @param {Request} req |
| 139 | + * @returns boolean |
| 140 | + */ |
| 141 | +function isSuspiciousRequest(req: Request) { |
| 142 | + const keys = Object.keys(req.query) |
| 143 | + |
| 144 | + // Since this function can only speculate by query strings (at the |
| 145 | + // moment), if the URL doesn't have any query strings it's not suspicious. |
| 146 | + if (!keys.length) { |
| 147 | + return false |
| 148 | + } |
| 149 | + |
| 150 | + // E.g. `/en/rest/actions?apiVersion=YYYY-MM-DD` |
| 151 | + if (keys.length === 1 && keys[0] === 'apiVersion') return false |
| 152 | + |
| 153 | + // Now check what query string keys are *left* based on a list of |
| 154 | + // recognized keys per different prefixes. |
| 155 | + for (const [prefix, recognizedKeys] of Object.entries(RECOGNIZED_KEYS_BY_PREFIX)) { |
| 156 | + if (req.path.startsWith(prefix)) { |
| 157 | + return keys.filter((key) => !recognizedKeys.includes(key)).length > 0 |
| 158 | + } |
| 159 | + } |
| 160 | + |
| 161 | + // E.g. `/fr/search?query=foo |
| 162 | + if (req.path.split('/')[2] === 'search') { |
| 163 | + return keys.filter((key) => !RECOGNIZED_KEYS.search.includes(key)).length > 0 |
| 164 | + } |
| 165 | + |
| 166 | + const unrecognizedKeys = keys.filter((key) => !MISC_KEYS.includes(key)) |
| 167 | + return unrecognizedKeys.length > 0 |
| 168 | +} |
0 commit comments