Skip to content

Commit 8365c7f

Browse files
committed
Added: Correctly handling unsupported content (see: http://doc.qt.io/qt-5.5/qwebpage.html#unsupportedContent).
1 parent d17b65c commit 8365c7f

File tree

3 files changed

+87
-2
lines changed

3 files changed

+87
-2
lines changed

splash/browser_tab.py

+52-2
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from splash.qtutils import (OPERATION_QT_CONSTANTS, WrappedSignal, qt2py,
2222
qurl2ascii, to_qurl)
2323
from splash.render_options import validate_size_str
24-
from splash.qwebpage import SplashQWebPage, SplashQWebView
24+
from splash.qwebpage import SplashQWebPage, SplashQWebView, RenderErrorInfo
2525
from splash.exceptions import JsError, OneShotCallbackError, ScriptError
2626
from splash.utils import to_bytes
2727
from splash.jsutils import (
@@ -71,6 +71,9 @@ def __init__(self, network_manager, splash_proxy_factory, verbosity,
7171
self._callback_proxies_to_cancel = weakref.WeakSet()
7272
self._js_console = None
7373
self._autoload_scripts = []
74+
self._is_unsupported_content = False
75+
self._unsupported_content_reply = None
76+
self._load_finished_after_unsupported_content_ready = False
7477

7578
self.logger = _BrowserTabLogger(uid=self._uid, verbosity=verbosity)
7679
self._init_webpage(verbosity, network_manager, splash_proxy_factory,
@@ -140,6 +143,8 @@ def _setup_webpage_events(self):
140143
self.web_page.mainFrame().loadFinished.connect(self._on_load_finished)
141144
self.web_page.mainFrame().urlChanged.connect(self._on_url_changed)
142145
self.web_page.mainFrame().javaScriptWindowObjectCleared.connect(self._on_javascript_window_object_cleared)
146+
self.web_page.setForwardUnsupportedContent(True)
147+
self.web_page.unsupportedContent.connect(self._on_unsupported_content)
143148
self.logger.add_web_page(self.web_page)
144149

145150
def return_result(self, result):
@@ -379,6 +384,15 @@ def _on_load_finished(self, ok):
379384
This callback is called for all web_page.mainFrame()
380385
loadFinished events.
381386
"""
387+
if self._is_unsupported_content:
388+
if self._unsupported_content_reply.isRunning():
389+
# XXX: We'll come back later when download finishes
390+
self.logger.log(
391+
'Still receving unsupported content', min_level=3)
392+
return
393+
else:
394+
self._load_finished_after_unsupported_content_ready = True
395+
self.logger.log('Unsupported content received', min_level=3)
382396
if self.web_page.maybe_redirect(ok):
383397
self.logger.log("Redirect or other non-fatal error detected", min_level=2)
384398
return
@@ -426,7 +440,11 @@ def _on_content_ready(self, ok, callback, errback, callback_id):
426440
"""
427441
This method is called when a QWebPage finishes loading its contents.
428442
"""
429-
if self.web_page.maybe_redirect(ok):
443+
if self._is_unsupported_content:
444+
if self._unsupported_content_reply.isRunning():
445+
# XXX: We'll come back later when download finishes
446+
return
447+
elif self.web_page.maybe_redirect(ok):
430448
# XXX: It assumes loadFinished will be called again because
431449
# redirect happens. If redirect is detected improperly,
432450
# loadFinished won't be called again, and Splash will return
@@ -438,6 +456,16 @@ def _on_content_ready(self, ok, callback, errback, callback_id):
438456

439457
if self.web_page.is_ok(ok):
440458
callback()
459+
elif self._is_unsupported_content:
460+
# XXX: Error downloading unsupported content.
461+
# `self.web_page.error_info` shall be `None` now
462+
error_info = RenderErrorInfo(
463+
'Network',
464+
int(self._unsupported_content_reply.error()),
465+
six.text_type(self._unsupported_content_reply.errorString()),
466+
six.text_type(self._unsupported_content_reply.url().url())
467+
)
468+
errback(error_info)
441469
elif self.web_page.error_loading(ok):
442470
# XXX: maybe return a meaningful error page instead of generic
443471
# error message?
@@ -512,6 +540,28 @@ def _on_url_changed(self, url):
512540
self.web_page.har.store_redirect(six.text_type(url.toString()))
513541
self._cancel_timers(self._timers_to_cancel_on_redirect)
514542

543+
def _on_unsupported_content_finished(self):
544+
self.logger.log('Unsupported content finished', min_level=3)
545+
if not self._load_finished_after_unsupported_content_ready:
546+
# XXX: The unsupported content reply might have finished before the
547+
# original loadFinished signal emits. In such cases we do not want
548+
# the same signal twice.
549+
if not self._unsupported_content_reply.error():
550+
self.web_page.mainFrame().loadFinished.emit(True)
551+
else:
552+
self.web_page.mainFrame().loadFinished.emit(False)
553+
554+
def _on_unsupported_content(self, reply):
555+
self.logger.log('Unsupported content detected', min_level=3)
556+
self._is_unsupported_content = True
557+
self._unsupported_content_reply = reply
558+
if reply.isFinished():
559+
# Already finished. The content might be very short.
560+
self.logger.log('Unsupported content already finished', min_level=3)
561+
self._on_unsupported_content_finished()
562+
else:
563+
reply.finished.connect(self._on_unsupported_content_finished)
564+
515565
def run_js_file(self, filename, handle_errors=True):
516566
"""
517567
Load JS library from file ``filename`` to the current frame.

splash/tests/mockserver.py

+20
Original file line numberDiff line numberDiff line change
@@ -743,6 +743,24 @@ def render_GET(self, request):
743743
return b"ok"
744744

745745

746+
class RawBytes(Resource):
747+
748+
def render_GET(self, request):
749+
body_length = int(request.args.get(b'length', [1024])[0])
750+
body = b'0' * body_length
751+
claim_length = int(request.args.get(b'claim_length', [body_length])[0])
752+
content = b'\n'.join([
753+
b'HTTP/1.1 200 OK',
754+
b'Content-Disposition: attachment',
755+
('Content-Length: %d' % claim_length).encode('utf8'),
756+
b'',
757+
body,
758+
])
759+
request.channel.transport.write(content)
760+
request.channel.transport.loseConnection()
761+
return NOT_DONE_YET # Already done
762+
763+
746764
class Index(Resource):
747765
isLeaf = True
748766

@@ -820,6 +838,8 @@ def __init__(self, http_port, https_port, proxy_port):
820838
self.putChild(b"bad-content-type", InvalidContentTypeResource())
821839
self.putChild(b"bad-content-type2", InvalidContentTypeResource2())
822840

841+
self.putChild(b"raw-bytes", RawBytes())
842+
823843
self.putChild(b"jsredirect", JsRedirect())
824844
self.putChild(b"jsredirect-to", JsRedirectTo())
825845
self.putChild(b"jsredirect-slowimage", JsRedirectSlowImage())

splash/tests/test_render.py

+15
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,21 @@ def test_invalid_wait(self):
208208
'wait': wait})
209209
self.assertStatusCode(r, 400)
210210

211+
def test_unsupported_content(self):
212+
cases = [
213+
# Short body (Can be received together with the headers)
214+
("raw-bytes?length=16", 200),
215+
# Short body with error
216+
("raw-bytes?length=16&claim_length=100", 502),
217+
# Long body (May not be received together with the headers)
218+
("raw-bytes?length=1000000", 200),
219+
# Long body with error
220+
("raw-bytes?length=1000000&claim_length=2000000", 502),
221+
]
222+
for url, http_status in cases:
223+
r = self.request({"url": self.mockurl(url)})
224+
self.assertStatusCode(r, http_status)
225+
211226
@pytest.mark.skipif(
212227
not qt_551_plus(),
213228
reason="resource_timeout doesn't work in Qt5 < 5.5.1. See issue #269 for details."

0 commit comments

Comments
 (0)