Skip to content
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.

Commit c4d033a

Browse files
committedJun 7, 2019
Implement a response size limit option
1 parent a729023 commit c4d033a

6 files changed

+208
-18
lines changed
 

‎splash/defaults.py

+3
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,9 @@
66

77
MAX_TIMEOUT = 90.0
88

9+
RESPONSE_SIZE_LIMIT = None
10+
MAX_RESPONSE_SIZE_LIMIT = None
11+
912
# Default size of browser window. As there're no decorations, this affects
1013
# both "window.inner*" and "window.outer*" values.
1114
VIEWPORT_SIZE = '1024x768'

‎splash/network_manager.py

+93
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,34 @@
2626
)
2727
from splash.response_middleware import ContentTypeMiddleware
2828
from splash import defaults
29+
from splash.qtutils import qt_header_items
2930
from splash.utils import to_bytes
3031
from splash.cookies import SplashCookieJar
3132

3233

34+
class _InvalidContentLength(ValueError):
35+
36+
def __init__(self, value):
37+
if isinstance(value, bytes):
38+
value = '0x' + value.hex()
39+
message = 'Invalid Content-Length header value: {}'.format(value)
40+
super().__init__(message)
41+
42+
43+
def _get_content_length(reply):
44+
for name, value in qt_header_items(reply):
45+
if bytes(name).lower() == b'content-length':
46+
value = bytes(value).split(b',', 1)[0]
47+
try:
48+
value = value.decode('latin1')
49+
value = int(value)
50+
except (UnicodeDecodeError, ValueError):
51+
raise _InvalidContentLength(value)
52+
if value < 0:
53+
raise _InvalidContentLength(value)
54+
return value
55+
56+
3357
class NetworkManagerFactory(object):
3458
def __init__(self, filters_path=None, verbosity=None, allowed_schemes=None, disable_browser_caches=None):
3559
verbosity = defaults.VERBOSITY if verbosity is None else verbosity
@@ -86,6 +110,7 @@ class ProxiedQNetworkAccessManager(QNetworkAccessManager):
86110
* Tracks information about requests/responses and stores it in HAR format,
87111
including request and response content.
88112
* Allows to set per-request timeouts.
113+
* Handles per-request response size limits.
89114
"""
90115
_REQUEST_ID = QNetworkRequest.User + 1
91116
_SHOULD_TRACK = QNetworkRequest.User + 2
@@ -398,11 +423,69 @@ def _on_reply_finished(self):
398423
content)
399424
self.log("Finished downloading {url}", reply)
400425

426+
def _aborted_due_to_size(self, reply, request, sizes_and_sources):
427+
render_options = self._get_render_options(request)
428+
if render_options is None:
429+
return False
430+
option = "response_size_limit"
431+
max_size = render_options.get(option, None)
432+
if max_size is not None:
433+
try:
434+
max_size = int(max_size)
435+
except ValueError:
436+
self.log("Non-integer value received for rendering option "
437+
"'{}': {}".format(option, max_size), min_level=1)
438+
self.log(traceback.format_exc(), min_level=1, format_msg=False)
439+
max_size = None
440+
else:
441+
if max_size < 0:
442+
self.log("The value of rendering option '{}' ({}) must be "
443+
"0 or higher.".format(option, max_size),
444+
min_level=1)
445+
max_size = None
446+
elif (render_options.max_response_size_limit is not None and
447+
max_size > render_options.max_response_size_limit):
448+
self.log("The value of rendering option '{}' ({}) exceeds "
449+
"the maximum value allowed.".format(
450+
option, max_size),
451+
min_level=1)
452+
max_size = None
453+
if max_size is None:
454+
if render_options.max_response_size_limit is not None:
455+
max_size = render_options.max_response_size_limit
456+
else:
457+
max_size = defaults.RESPONSE_SIZE_LIMIT
458+
if max_size is None:
459+
return False
460+
for size, source in sizes_and_sources:
461+
if size is None:
462+
continue
463+
if size <= max_size:
464+
continue
465+
self.log("The {} ({}) exceeds the maximum response size ({}), "
466+
"aborting: {{url}}".format(source, size, max_size),
467+
reply, min_level=1)
468+
self.log(render_options, reply, min_level=1, format_msg=False)
469+
reply.abort()
470+
return True
471+
return False
472+
401473
def _on_reply_headers(self):
402474
"""Signal emitted before reading response body, after getting headers
403475
"""
404476
reply = self.sender()
405477
request = reply.request()
478+
479+
try:
480+
content_length = _get_content_length(reply)
481+
except _InvalidContentLength as error:
482+
self.log("On response from {{url}}: {}".format(error),
483+
reply, min_level=3)
484+
content_length = None
485+
sizes_and_sources = ((content_length, "Content-Length header"),)
486+
if self._aborted_due_to_size(reply, request, sizes_and_sources):
487+
return
488+
406489
self._handle_reply_cookies(reply)
407490
self._run_webpage_callbacks(request, "on_response_headers", reply)
408491

@@ -413,6 +496,16 @@ def _on_reply_headers(self):
413496
self.log("Headers received for {url}", reply, min_level=3)
414497

415498
def _on_reply_download_progress(self, received, total):
499+
reply = self.sender()
500+
request = reply.request()
501+
502+
sizes_and_sources = (
503+
(total, "expected response size"),
504+
(received, "size of the response content downloaded so far"),
505+
)
506+
if self._aborted_due_to_size(reply, request, sizes_and_sources):
507+
return
508+
416509
har = self._get_har()
417510
if har is not None:
418511
req_id = self._get_request_id()

‎splash/render_options.py

+4-3
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,9 @@ class RenderOptions(object):
1414

1515
_REQUIRED = object()
1616

17-
def __init__(self, data, max_timeout):
17+
def __init__(self, data, max_timeout, max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT):
1818
self.data = data
19+
self.max_response_size_limit = max_response_size_limit
1920
self.max_timeout = max_timeout
2021

2122
@classmethod
@@ -29,7 +30,7 @@ def raise_error(cls, argument, description, type='bad_argument', **kwargs):
2930
raise BadOption(params)
3031

3132
@classmethod
32-
def fromrequest(cls, request, max_timeout):
33+
def fromrequest(cls, request, max_timeout, max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT):
3334
"""
3435
Initialize options from a Twisted Request.
3536
"""
@@ -60,7 +61,7 @@ def fromrequest(cls, request, max_timeout):
6061
request.content.seek(0)
6162

6263
data['uid'] = id(request)
63-
return cls(data, max_timeout)
64+
return cls(data, max_timeout, max_response_size_limit=max_response_size_limit)
6465

6566
def get_expired_args(self, cache):
6667
"""

‎splash/resources.py

+26-11
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
import splash
1919
from splash.argument_cache import ArgumentCache
20+
from splash import defaults
2021
from splash.qtrender import (
2122
HtmlRender, PngRender, JsonRender, HarRender, JpegRender
2223
)
@@ -85,17 +86,18 @@ class BaseRenderResource(_ValidatingResource):
8586
isLeaf = True
8687
content_type = "text/html; charset=utf-8"
8788

88-
def __init__(self, pool, max_timeout, argument_cache):
89+
def __init__(self, pool, max_timeout, argument_cache, max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT):
8990
Resource.__init__(self)
9091
self.pool = pool
9192
self.js_profiles_path = self.pool.js_profiles_path
9293
self.max_timeout = max_timeout
9394
self.argument_cache = argument_cache
95+
self.max_response_size_limit = max_response_size_limit
9496

9597
def render_GET(self, request):
9698
#log.msg("%s %s %s %s" % (id(request), request.method, request.path, request.args))
9799
request.starttime = time.time()
98-
render_options = RenderOptions.fromrequest(request, self.max_timeout)
100+
render_options = RenderOptions.fromrequest(request, self.max_timeout, max_response_size_limit=self.max_response_size_limit)
99101

100102
# process argument cache
101103
original_options = render_options.data.copy()
@@ -281,8 +283,9 @@ def __init__(self, pool, sandboxed,
281283
argument_cache,
282284
strict,
283285
implicit_main,
286+
max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT,
284287
):
285-
BaseRenderResource.__init__(self, pool, max_timeout, argument_cache)
288+
BaseRenderResource.__init__(self, pool, max_timeout, argument_cache, max_response_size_limit=max_response_size_limit)
286289
self.sandboxed = sandboxed
287290
self.lua_package_path = lua_package_path
288291
self.lua_sandbox_allowed_modules = lua_sandbox_allowed_modules
@@ -434,20 +437,22 @@ class DemoUI(_ValidatingResource):
434437

435438
PATH = b'info'
436439

437-
def __init__(self, pool, lua_enabled, max_timeout):
440+
def __init__(self, pool, lua_enabled, max_timeout, max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT):
438441
Resource.__init__(self)
439442
self.pool = pool
440443
self.lua_enabled = lua_enabled
441444
self.max_timeout = max_timeout
445+
self.max_response_size_limit = max_response_size_limit
442446

443447
def _validate_params(self, request):
444-
options = RenderOptions.fromrequest(request, self.max_timeout)
448+
options = RenderOptions.fromrequest(request, self.max_timeout, max_response_size_limit=self.max_response_size_limit)
445449
options.get_filters(self.pool) # check
446450
params = options.get_common_params(self.pool.js_profiles_path)
447451
params.update({
448452
'save_args': options.get_save_args(),
449453
'load_args': options.get_load_args(),
450454
'timeout': options.get_timeout(),
455+
'response_size_limit': options.get_response_size_limit(),
451456
'request_body': options.get_request_body(),
452457
'response_body': options.get_response_body(),
453458
'har': 1,
@@ -471,6 +476,7 @@ def render_GET(self, request):
471476
url = 'http://' + url
472477
params['url'] = url
473478
timeout = params['timeout']
479+
response_size_limit = params['response_size_limit']
474480
params = {k: v for k, v in params.items() if v is not None}
475481

476482
# disable "phases" HAR Viewer feature
@@ -514,6 +520,7 @@ def render_GET(self, request):
514520
<input type="hidden" name="images" value="1">
515521
<input type="hidden" name="expand" value="1"> <!-- for HAR viewer -->
516522
<input type="hidden" name="timeout" value="%(timeout)s">
523+
<input type="hidden" name="response_size_limit" value="%(response_size_limit)s">
517524
518525
<div class="btn-group" id="render-form">
519526
<input class="form-control col-lg-8" type="text" placeholder="Paste an URL" type="text" name="url" value="%(url)s">
@@ -563,6 +570,7 @@ def render_GET(self, request):
563570
"lua_enabled": self.lua_enabled,
564571
}),
565572
timeout=timeout,
573+
response_size_limit=response_size_limit,
566574
url=url,
567575
theme=BOOTSTRAP_THEME,
568576
cm_resources=CODEMIRROR_RESOURCES if self.lua_enabled else "",
@@ -576,18 +584,20 @@ def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled,
576584
max_timeout,
577585
argument_cache_max_entries,
578586
strict_lua_runner,
587+
max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT,
579588
):
580589
Resource.__init__(self)
581590
self.argument_cache = ArgumentCache(argument_cache_max_entries)
582591
self.ui_enabled = ui_enabled
583592
self.lua_enabled = lua_enabled
584593

585594
_args = pool, max_timeout, self.argument_cache
586-
self.putChild(b"render.html", RenderHtmlResource(*_args))
587-
self.putChild(b"render.png", RenderPngResource(*_args))
588-
self.putChild(b"render.jpeg", RenderJpegResource(*_args))
589-
self.putChild(b"render.json", RenderJsonResource(*_args))
590-
self.putChild(b"render.har", RenderHarResource(*_args))
595+
_kwargs = {'max_response_size_limit': max_response_size_limit}
596+
self.putChild(b"render.html", RenderHtmlResource(*_args, **_kwargs))
597+
self.putChild(b"render.png", RenderPngResource(*_args, **_kwargs))
598+
self.putChild(b"render.jpeg", RenderJpegResource(*_args, **_kwargs))
599+
self.putChild(b"render.json", RenderJsonResource(*_args, **_kwargs))
600+
self.putChild(b"render.har", RenderHarResource(*_args, **_kwargs))
591601

592602
self.putChild(b"_debug", DebugResource(pool, self.argument_cache))
593603
self.putChild(b"_gc", ClearCachesResource(self.argument_cache))
@@ -605,6 +615,7 @@ def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled,
605615
max_timeout=max_timeout,
606616
argument_cache=self.argument_cache,
607617
strict=strict_lua_runner,
618+
max_response_size_limit=max_response_size_limit,
608619
)
609620
self.putChild(b"execute", ExecuteLuaScriptResource(
610621
implicit_main=False, **lua_kwargs))
@@ -626,9 +637,11 @@ def __init__(self, pool, ui_enabled, lua_enabled, lua_sandbox_enabled,
626637
self.putChild(DemoUI.PATH, DemoUI(
627638
pool=pool,
628639
lua_enabled=self.lua_enabled,
629-
max_timeout=max_timeout
640+
max_timeout=max_timeout,
641+
max_response_size_limit=max_response_size_limit,
630642
))
631643
self.max_timeout = max_timeout
644+
self.max_response_size_limit = max_response_size_limit
632645

633646
def getChild(self, name, request):
634647
if name == b"" and self.ui_enabled:
@@ -720,6 +733,7 @@ def render_GET(self, request):
720733
<input type="hidden" name="images" value="1">
721734
<input type="hidden" name="expand" value="1"> <!-- for HAR viewer -->
722735
<input type="hidden" name="timeout" value="%(timeout)s">
736+
<input type="hidden" name="response_size_limit" value="%(response_size_limit)s">
723737
724738
<fieldset>
725739
<div class="">
@@ -754,5 +768,6 @@ def render_GET(self, request):
754768
}),
755769
cm_resources=CODEMIRROR_RESOURCES,
756770
timeout=self.max_timeout,
771+
response_size_limit=self.max_response_size_limit,
757772
)
758773
return result.encode('utf8')

‎splash/server.py

+13-4
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,9 @@ def parse_opts(jupyter=False, argv=sys.argv):
7878
help="number of render slots (default: %default)")
7979
op.add_option("--max-timeout", type="float", default=defaults.MAX_TIMEOUT,
8080
help="maximum allowed value for timeout (default: %default)")
81+
op.add_option("--max-response-size-limit", type="int",
82+
default=defaults.MAX_RESPONSE_SIZE_LIMIT,
83+
help="maximum allowed value for response size limit (default: %default)")
8184
op.add_option("--disable-ui", action="store_true", default=False,
8285
help="disable web UI")
8386
op.add_option("--disable-lua", action="store_true", default=False,
@@ -94,6 +97,7 @@ def parse_opts(jupyter=False, argv=sys.argv):
9497
opts.port = None
9598
opts.slots = None
9699
opts.max_timeout = None
100+
opts.max_response_size_limit = None
97101
opts.argument_cache_max_entries = None
98102

99103
return opts, args
@@ -170,7 +174,8 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout,
170174
strict_lua_runner=False,
171175
argument_cache_max_entries=None,
172176
disable_browser_caches=False,
173-
verbosity=None):
177+
verbosity=None,
178+
max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT):
174179
from twisted.internet import reactor
175180
from twisted.web.server import Site
176181
from splash.resources import Root
@@ -181,8 +186,8 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout,
181186
verbosity = defaults.VERBOSITY if verbosity is None else verbosity
182187
slots = defaults.SLOTS if slots is None else slots
183188

184-
log.msg("verbosity={}, slots={}, argument_cache_max_entries={}, max-timeout={}".format(
185-
verbosity, slots, argument_cache_max_entries, max_timeout
189+
log.msg("verbosity={}, slots={}, argument_cache_max_entries={}, max-timeout={}, max-response-size-limit={}".format(
190+
verbosity, slots, argument_cache_max_entries, max_timeout, max_response_size_limit
186191
))
187192

188193
pool = RenderPool(
@@ -215,6 +220,7 @@ def splash_server(portnum, ip, slots, network_manager_factory, max_timeout,
215220
max_timeout=max_timeout,
216221
argument_cache_max_entries=argument_cache_max_entries,
217222
strict_lua_runner=strict_lua_runner,
223+
max_response_size_limit=max_response_size_limit,
218224
)
219225
factory = Site(root)
220226
reactor.listenTCP(portnum, factory, interface=ip)
@@ -264,6 +270,7 @@ def default_splash_server(portnum, ip, max_timeout, slots=None,
264270
verbosity=None,
265271
server_factory=splash_server,
266272
disable_browser_caches=False,
273+
max_response_size_limit=defaults.MAX_RESPONSE_SIZE_LIMIT,
267274
):
268275
from splash import network_manager
269276
network_manager_factory = network_manager.NetworkManagerFactory(
@@ -293,6 +300,7 @@ def default_splash_server(portnum, ip, max_timeout, slots=None,
293300
verbosity=verbosity,
294301
max_timeout=max_timeout,
295302
argument_cache_max_entries=argument_cache_max_entries,
303+
max_response_size_limit=max_response_size_limit,
296304
)
297305

298306

@@ -391,7 +399,8 @@ def main(jupyter=False, argv=sys.argv, server_factory=splash_server):
391399
max_timeout=opts.max_timeout,
392400
argument_cache_max_entries=opts.argument_cache_max_entries,
393401
server_factory=server_factory,
394-
disable_browser_caches=opts.disable_browser_caches
402+
disable_browser_caches=opts.disable_browser_caches,
403+
max_response_size_limit=opts.max_response_size_limit,
395404
)
396405
signal.signal(signal.SIGUSR1, lambda s, f: traceback.print_stack(f))
397406

‎splash/tests/test_network_manager.py

+69
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
from itertools import permutations, product
2+
3+
from PyQt5.QtNetwork import QNetworkReply
4+
5+
from splash.network_manager import _get_content_length, _InvalidContentLength
6+
7+
from pytest import mark, raises
8+
9+
10+
class MockReply(QNetworkReply):
11+
12+
def __init__(self, headers):
13+
super().__init__()
14+
for header, value in headers:
15+
self.setRawHeader(header, value)
16+
17+
18+
CONTENT_LENGHT_HEADER_VARIANTS = (
19+
b'Content-Length',
20+
b'content-length',
21+
b'CONTENT-LENGTH',
22+
b'cOntent-length',
23+
)
24+
25+
26+
@mark.parametrize(
27+
'headers,result',
28+
(
29+
(
30+
(),
31+
None
32+
),
33+
*(
34+
(
35+
(
36+
(header, value),
37+
),
38+
result
39+
)
40+
for (header, (value, result)) in product(
41+
CONTENT_LENGHT_HEADER_VARIANTS,
42+
(
43+
(b'', _InvalidContentLength),
44+
(b'1', 1),
45+
(b'-1', _InvalidContentLength),
46+
(b'1.0', _InvalidContentLength),
47+
(b'a', _InvalidContentLength),
48+
('á'.encode('utf-8'), _InvalidContentLength),
49+
)
50+
)
51+
),
52+
*(
53+
(
54+
(
55+
(header, b'1,2'),
56+
),
57+
1
58+
)
59+
for header in CONTENT_LENGHT_HEADER_VARIANTS
60+
),
61+
)
62+
)
63+
def test_get_content_length(headers, result):
64+
if result is None or isinstance(result, int):
65+
assert _get_content_length(MockReply(headers)) == result
66+
else:
67+
assert issubclass(result, Exception)
68+
with raises(result):
69+
_get_content_length(MockReply(headers))

0 commit comments

Comments
 (0)
Please sign in to comment.