1+ # Anubis rules configuration file
2+ #
3+ # This was copied from https://github.com/TecharoHQ/anubis/blob/main/data/meta/default-config.yaml
4+ # but can be imported like this once the next version of Anubis is released:
5+ # - import: (data)/meta/default-config.yaml
6+
7+
8+ bots :
9+ - # Pathological bots to deny
10+ # This correlates to data/bots/_deny-pathological.yaml in the source tree
11+ # https://github.com/TecharoHQ/anubis/blob/main/data/bots/_deny-pathological.yaml
12+ import : (data)/bots/_deny-pathological.yaml
13+ - import : (data)/bots/aggressive-brazilian-scrapers.yaml
14+
15+ # Aggressively block AI/LLM related bots/agents by default
16+ # - import: (data)/meta/ai-block-aggressive.yaml
17+
18+ # Consider replacing the aggressive AI policy with more selective policies:
19+ - import : (data)/meta/ai-block-moderate.yaml
20+ # - import: (data)/meta/ai-block-permissive.yaml
21+
22+ # Search engine crawlers to allow, defaults to:
23+ # - Google (so they don't try to bypass Anubis)
24+ # - Apple
25+ # - Bing
26+ # - DuckDuckGo
27+ # - Qwant
28+ # - The Internet Archive
29+ # - Kagi
30+ # - Marginalia
31+ # - Mojeek
32+ - import : (data)/crawlers/_allow-good.yaml
33+ # Challenge Firefox AI previews
34+ - import : (data)/clients/x-firefox-ai.yaml
35+
36+ # Allow common "keeping the internet working" routes (well-known, favicon, robots.txt)
37+ - import : (data)/common/keep-internet-working.yaml
38+
39+ # # Punish any bot with "bot" in the user-agent string
40+ # # This is known to have a high false-positive rate, use at your own risk
41+ # - name: generic-bot-catchall
42+ # user_agent_regex: (?i:bot|crawler)
43+ # action: CHALLENGE
44+ # challenge:
45+ # difficulty: 16 # impossible
46+ # report_as: 4 # lie to the operator
47+ # algorithm: slow # intentionally waste CPU cycles and time
48+
49+ # Requires a subscription to Thoth to use, see
50+ # https://anubis.techaro.lol/docs/admin/thoth#geoip-based-filtering
51+ # - name: countries-with-aggressive-scrapers
52+ # action: WEIGH
53+ # geoip:
54+ # countries:
55+ # - BR
56+ # - CN
57+ # weight:
58+ # adjust: 10
59+
60+ # Requires a subscription to Thoth to use, see
61+ # https://anubis.techaro.lol/docs/admin/thoth#asn-based-filtering
62+ # - name: aggressive-asns-without-functional-abuse-contact
63+ # action: WEIGH
64+ # asns:
65+ # match:
66+ # - 13335 # Cloudflare
67+ # - 136907 # Huawei Cloud
68+ # - 45102 # Alibaba Cloud
69+ # weight:
70+ # adjust: 10
71+
72+ # ## System load based checks.
73+ # # If the system is under high load, add weight.
74+ # - name: high-load-average
75+ # action: WEIGH
76+ # expression: load_1m >= 10.0 # make sure to end the load comparison in a .0
77+ # weight:
78+ # adjust: 20
79+
80+ # # If your backend service is running on the same operating system as Anubis,
81+ # # you can uncomment this rule to make the challenge easier when the system is
82+ # # under low load.
83+ # #
84+ # # If it is not, remove weight.
85+ # - name: low-load-average
86+ # action: WEIGH
87+ # expression: load_15m <= 4.0 # make sure to end the load comparison in a .0
88+ # weight:
89+ # adjust: -10
90+
91+ # Assert behaviour that only genuine browsers display. This ensures that Chrome
92+ # or Firefox versions
93+ - name : realistic-browser-catchall
94+ expression :
95+ all :
96+ - ' "User-Agent" in headers'
97+ - ' ( userAgent.contains("Firefox") ) || ( userAgent.contains("Chrome") ) || ( userAgent.contains("Safari") )'
98+ - ' "Accept" in headers'
99+ - ' "Sec-Fetch-Dest" in headers'
100+ - ' "Sec-Fetch-Mode" in headers'
101+ - ' "Sec-Fetch-Site" in headers'
102+ - ' "Accept-Encoding" in headers'
103+ - ' ( headers["Accept-Encoding"].contains("zstd") || headers["Accept-Encoding"].contains("br") )'
104+ - ' "Accept-Language" in headers'
105+ action : WEIGH
106+ weight :
107+ adjust : -10
108+
109+ # The Upgrade-Insecure-Requests header is typically sent by browsers, but not always
110+ - name : upgrade-insecure-requests
111+ expression : ' "Upgrade-Insecure-Requests" in headers'
112+ action : WEIGH
113+ weight :
114+ adjust : -2
115+
116+ # Chrome should behave like Chrome
117+ - name : chrome-is-proper
118+ expression :
119+ all :
120+ - userAgent.contains("Chrome")
121+ - ' "Sec-Ch-Ua" in headers'
122+ - ' headers["Sec-Ch-Ua"].contains("Chromium")'
123+ - ' "Sec-Ch-Ua-Mobile" in headers'
124+ - ' "Sec-Ch-Ua-Platform" in headers'
125+ action : WEIGH
126+ weight :
127+ adjust : -5
128+
129+ - name : should-have-accept
130+ expression : ' !("Accept" in headers)'
131+ action : WEIGH
132+ weight :
133+ adjust : 5
134+
135+ # Generic catchall rule
136+ - name : generic-browser
137+ user_agent_regex : >-
138+ Mozilla|Opera
139+ action : WEIGH
140+ weight :
141+ adjust : 10
142+
143+ store :
144+ # backend: bbolt
145+ # parameters:
146+ # path: /data/anubis.bdb
147+
148+ backend : valkey
149+ parameters :
150+ url : " redis://cache:6379/3"
0 commit comments