Skip to content

Commit 6f92669

Browse files
authored
Fix/redirect (#104)
* Update txextra to fix issue in redirect handling * Ignore private redirects * Pass the extra argument in the correct place * Add support for loading headers from http_request_headers dictionary * Stringify http_request_headers to make twisted happy * Add extra argument to web_connectivity test helper call
1 parent 68153b8 commit 6f92669

File tree

4 files changed

+117
-17
lines changed

4 files changed

+117
-17
lines changed

oonib/common/ip_utils.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
from ipaddr import IPv4Address, IPv6Address
2+
from ipaddr import AddressValueError
3+
4+
5+
def in_private_ip_space(address):
6+
ip_address = IPv4Address(address)
7+
return any(
8+
[ip_address.is_private, ip_address.is_loopback]
9+
)
10+
11+
def is_public_ipv4_address(address):
12+
try:
13+
return not in_private_ip_space(address)
14+
except AddressValueError:
15+
return False
16+
17+
def is_private_ipv4_address(address):
18+
try:
19+
return in_private_ip_space(address)
20+
except AddressValueError:
21+
return False
22+
23+
24+
def is_private_address(address, only_loopback=False):
25+
"""
26+
Checks to see if an IP address is in private IP space and if the
27+
hostname is either localhost or *.local.
28+
29+
:param address: an IP address of a hostname
30+
:param only_loopback: will only check if the IP address is either
31+
127.0.0.1/8 or ::1 in ipv6
32+
:return: True if the IP address or host is in private space
33+
"""
34+
try:
35+
ip_address = IPv4Address(address)
36+
except AddressValueError:
37+
try:
38+
ip_address = IPv6Address(address)
39+
except AddressValueError:
40+
if address == "localhost":
41+
return True
42+
elif address.endswith(".local"):
43+
return True
44+
return False
45+
46+
candidates = [ip_address.is_loopback]
47+
if not only_loopback:
48+
candidates.append(ip_address.is_private)
49+
return any(candidates)

oonib/common/txextra.py

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919

2020
from twisted.python import log
2121

22+
from .ip_utils import is_private_address
23+
2224
class TrueHeaders(Headers):
2325
def __init__(self, rawHeaders=None):
2426
self._rawHeaders = dict()
@@ -35,6 +37,12 @@ def setRawHeaders(self, name, values):
3537
self._rawHeaders[name.lower()]['name'] = name
3638
self._rawHeaders[name.lower()]['values'] = values
3739

40+
def copy(self):
41+
rawHeaders = {}
42+
for k, v in self.getAllRawHeaders():
43+
rawHeaders[k] = v
44+
return self.__class__(rawHeaders)
45+
3846
def getAllRawHeaders(self):
3947
for _, v in self._rawHeaders.iteritems():
4048
yield v['name'], v['values']
@@ -168,6 +176,10 @@ class FixedRedirectAgent(BrowserLikeRedirectAgent):
168176
This is a redirect agent with this patch manually applied:
169177
https://twistedmatrix.com/trac/ticket/8265
170178
"""
179+
def __init__(self, agent, redirectLimit=20, ignorePrivateRedirects=False):
180+
self.ignorePrivateRedirects = ignorePrivateRedirects
181+
BrowserLikeRedirectAgent.__init__(self, agent, redirectLimit)
182+
171183
def _handleRedirect(self, response, method, uri, headers, redirectCount):
172184
"""
173185
Handle a redirect response, checking the number of redirects already
@@ -191,12 +203,26 @@ def _handleRedirect(self, response, method, uri, headers, redirectCount):
191203
response.request.absoluteURI,
192204
locationHeaders[0]
193205
)
206+
if getattr(client, 'URI', None):
207+
uri = client.URI.fromBytes(location)
208+
else:
209+
# Backward compatibility with twisted 14.0.2
210+
uri = client._URI.fromBytes(location)
211+
if self.ignorePrivateRedirects and is_private_address(uri.host,
212+
only_loopback=True):
213+
return response
214+
194215
deferred = self._agent.request(method, location, headers)
195216

196217
def _chainResponse(newResponse):
218+
if isinstance(newResponse, Failure):
219+
# This is needed to write the response even in case of failure
220+
newResponse.previousResponse = response
221+
newResponse.requestLocation = location
222+
return newResponse
197223
newResponse.setPreviousResponse(response)
198224
return newResponse
199225

200-
deferred.addCallback(_chainResponse)
226+
deferred.addBoth(_chainResponse)
201227
return deferred.addCallback(
202228
self._handleResponse, method, uri, headers, redirectCount + 1)

oonib/testhelpers/http_helpers.py

Lines changed: 40 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -309,8 +309,9 @@ def lookup(self, response_type, key):
309309
defer.returnValue(value)
310310

311311
@defer.inlineCallbacks
312-
def http_request(self, url, include_http_responses=False):
313-
cached_value = yield self.lookup('http_request', url)
312+
def http_request(self, url, http_request_headers, include_http_responses=False):
313+
key = url + json.dumps(http_request_headers)
314+
cached_value = yield self.lookup('http_request', key)
314315
if cached_value is not None:
315316
if include_http_responses is not True:
316317
cached_value.pop('responses', None)
@@ -324,15 +325,16 @@ def http_request(self, url, include_http_responses=False):
324325
}
325326

326327
agent = ContentDecoderAgent(
327-
FixedRedirectAgent(TrueHeadersAgent(reactor)),
328-
[('gzip', GzipDecoder)]
328+
FixedRedirectAgent(TrueHeadersAgent(reactor),
329+
ignorePrivateRedirects=True),
330+
[('gzip', GzipDecoder)]
329331
)
330332
try:
331333
retries = 0
332334
while True:
333335
try:
334336
response = yield agent.request('GET', url,
335-
TrueHeaders(REQUEST_HEADERS))
337+
TrueHeaders(http_request_headers))
336338
headers = {}
337339
for name, value in response.headers.getAllRawHeaders():
338340
headers[name] = unicode(value[0], errors='ignore')
@@ -364,11 +366,13 @@ def http_request(self, url, include_http_responses=False):
364366
page_info['failure'] = 'connection_refused_error'
365367
except ConnectError:
366368
page_info['failure'] = 'connect_error'
367-
except:
369+
except Exception as exc:
368370
# XXX map more failures
369371
page_info['failure'] = 'unknown_error'
372+
log.err("Unknown error occurred")
373+
log.exception(exc)
370374

371-
yield self.cache_value('http_request', url, page_info)
375+
yield self.cache_value('http_request', key, page_info)
372376
if include_http_responses is not True:
373377
page_info.pop('responses', None)
374378
defer.returnValue(page_info)
@@ -451,10 +455,14 @@ class WebConnectivity(OONIBHandler):
451455
@defer.inlineCallbacks
452456
def control_measurement(self, http_url, socket_list,
453457
include_http_responses,
454-
invalid_sockets):
458+
invalid_sockets,
459+
http_request_headers=None):
460+
if http_request_headers is None:
461+
http_request_headers = {}
462+
455463
hostname = urlparse(http_url).netloc
456464
dl = [
457-
web_connectivity_cache.http_request(http_url, include_http_responses),
465+
web_connectivity_cache.http_request(http_url, http_request_headers, include_http_responses),
458466
web_connectivity_cache.dns_consistency(hostname)
459467
]
460468
for socket in socket_list:
@@ -477,12 +485,20 @@ def control_measurement(self, http_url, socket_list,
477485
})
478486

479487
def validate_request(self, request):
488+
allowed_headers = ['user-agent', 'accept', 'accept-language']
480489
required_keys = ['http_request', 'tcp_connect']
481490
for rk in required_keys:
482491
if rk not in request.keys():
483492
raise HTTPError(400, "Missing %s" % rk)
484493
if not HTTP_REQUEST_REGEXP.match(request['http_request']):
485494
raise HTTPError(400, "Invalid http_request URL")
495+
496+
http_request_headers = request.get('http_request_headers', {})
497+
for k, v in http_request_headers.iteritems():
498+
if k.lower() not in allowed_headers:
499+
raise HTTPError(400, "Invalid header %s" % k)
500+
if not isinstance(v, list):
501+
raise HTTPError(400, "Headers must be a list")
486502
# We don't need to check the tcp_connect field because we strip it in
487503
# the post already.
488504

@@ -508,13 +524,22 @@ def post(self):
508524
request['tcp_connect'] = tcp_connect
509525

510526
self.validate_request(request)
511-
include_http_responses = request.get("include_http_responses",
512-
False)
527+
include_http_responses = request.get(
528+
"include_http_responses",
529+
False
530+
)
531+
532+
# We convert headers to str so twisted is happy (unicode triggers
533+
# errors)
534+
http_request_headers = {}
535+
for k, v in request.get('http_request_headers', {}).iteritems():
536+
http_request_headers[str(k)] = map(str, v)
513537
self.control_measurement(
514-
str(request['http_request']),
515-
request['tcp_connect'],
516-
include_http_responses,
517-
invalid_sockets
538+
http_url=str(request['http_request']),
539+
include_http_responses=include_http_responses,
540+
http_request_headers=http_request_headers,
541+
socket_list=request['tcp_connect'],
542+
invalid_sockets=invalid_sockets
518543
)
519544
except HTTPError:
520545
raise

oonib/tests/test_web_connectivity.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ def tearDown(self):
1414
@defer.inlineCallbacks
1515
def test_http_request(self):
1616
value = yield self.web_connectivity_cache.http_request(
17-
'https://www.google.com/humans.txt')
17+
'https://www.google.com/humans.txt', {})
1818
self.assertEqual(
1919
value['body_length'], 286
2020
)

0 commit comments

Comments
 (0)