From d0e98dcc0d08ccc919b693e688d91c2c0f5a619e Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Mon, 25 Feb 2019 13:07:09 +0000 Subject: [PATCH 01/36] Fix formatting error in man page --- doc/MANPAGE.md | 4 ++-- pagekite/manual.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/MANPAGE.md b/doc/MANPAGE.md index 928cf07f..ead46366 100644 --- a/doc/MANPAGE.md +++ b/doc/MANPAGE.md @@ -480,8 +480,8 @@ lapcat(1), ,
- Bjarni R. Einarsson 
 - The Beanstalks Project ehf. 
 - The Rannis Technology Development Fund 
-- Joar Wandborg 
-- Luc-Pierre Terral +- Joar Wandborg +- Luc-Pierre Terral ## Copyright and license ## diff --git a/pagekite/manual.py b/pagekite/manual.py index 4d9795f8..dcb4079a 100755 --- a/pagekite/manual.py +++ b/pagekite/manual.py @@ -448,8 +448,8 @@
- Bjarni R. Einarsson 
     - The Beanstalks Project ehf. 
     - The Rannis Technology Development Fund 
-    - Joar Wandborg 
- - Luc-Pierre Terral + - Joar Wandborg + - Luc-Pierre Terral """) MANUAL_TOC = ( From 16ea46c6e5abffacb31d5a541122c8543c7273dc Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 2 Mar 2019 16:03:00 +0000 Subject: [PATCH 02/36] Fix broken logrotate config --- etc/logrotate.d/pagekite.debian | 1 + 1 file changed, 1 insertion(+) diff --git a/etc/logrotate.d/pagekite.debian b/etc/logrotate.d/pagekite.debian index dd917f19..30bc0ad1 100644 --- a/etc/logrotate.d/pagekite.debian +++ b/etc/logrotate.d/pagekite.debian @@ -1,5 +1,6 @@ /var/log/pagekite/pagekite.log { daily + su daemon daemon missingok rotate 7 postrotate From 4ac37e35396e2393a9bbe2e954674537747e384b Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 2 Mar 2019 16:36:26 +0000 Subject: [PATCH 03/36] Make it possible to manually override version numbers --- setup.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a24174c0..971143ef 100755 --- a/setup.py +++ b/setup.py @@ -13,7 +13,9 @@ setup( name="pagekite", - version=APPVER.replace('github', 'dev%d' % (120*int(time.time()/120))), + version=os.getenv( + 'PAGEKITE_VERSION', + APPVER.replace('github', 'dev%d' % (120*int(time.time()/120)))), license="AGPLv3+", author="Bjarni R. Einarsson", author_email="bre@pagekite.net", From 16450368d285a5a3e7213282b6a38085584617b5 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sun, 19 May 2019 01:40:42 +0000 Subject: [PATCH 04/36] Postpone allocation of ZLib decompressor until we see a ZChunk --- pagekite/proto/selectables.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 0376b3eb..4cf9892b 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -809,7 +809,7 @@ def __init__(self, fd=None, address=None, on_port=None, ui=None): self.compressed = False self.header = '' self.chunk = '' - self.zr = zlib.decompressobj() + self.zr = None def __html__(self): return Selectable.__html__(self) @@ -871,6 +871,8 @@ def ProcessData(self, data): if self.want_bytes == 0: if self.compressed: try: + if not self.zr: + self.zr = zlib.decompressobj() cchunk = self.zr.decompress(self.chunk) except zlib.error: cchunk = '' From 78728bc6b06532c9fb71f15e0c7ac15e5e869d1a Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 31 May 2019 00:19:17 +0000 Subject: [PATCH 05/36] Fix bug where disabling ZChunks also killed AddKites and Mobile features --- pagekite/proto/conns.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index 56431ff8..adefa2ae 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -170,13 +170,13 @@ def _FrontEnd(conn, body, conns): try: for prefix in ('X-Beanstalk', 'X-PageKite'): for feature in conn.parser.Header(prefix+'-Features'): - if not conns.config.disable_zchunks: - if feature == 'ZChunks': + if feature == 'ZChunks': + if not conns.config.disable_zchunks: self.EnableZChunks(level=1) - elif feature == 'AddKites': - self.server_info[self.S_ADD_KITES] = True - elif feature == 'Mobile': - self.server_info[self.S_IS_MOBILE] = True + elif feature == 'AddKites': + self.server_info[self.S_ADD_KITES] = True + elif feature == 'Mobile': + self.server_info[self.S_IS_MOBILE] = True # Track which versions we see in the wild. version = 'old' From 30753d9244849ef2a2a5b0764ceaf8d6cd6083bc Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Thu, 30 May 2019 22:24:44 +0000 Subject: [PATCH 06/36] Add end-to-end HTTPS example to 80_httpd.rc.sample --- etc/pagekite.d/80_httpd.rc.sample | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/etc/pagekite.d/80_httpd.rc.sample b/etc/pagekite.d/80_httpd.rc.sample index 376c704f..55bb39f8 100644 --- a/etc/pagekite.d/80_httpd.rc.sample +++ b/etc/pagekite.d/80_httpd.rc.sample @@ -3,6 +3,11 @@ service_on = http:@kitename : localhost:80 : @kitesecret +# If you have TLS/SSL configured locally, uncomment this to enable end-to-end +# TLS encryption instead of relying on the wild-card certificate at the relay. +# +#service_on = https:@kitename : localhost:443 : @kitesecret + # # Uncomment the following to globally DISABLE the request firewall. Do this # if you are sure you know what you are doing, for more details please see From c94fa17ea341f7b704aa11eaa94844b188ba0694 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 8 Jun 2019 18:29:37 +0000 Subject: [PATCH 07/36] Avoid crashing if we hit our selectables limits --- pagekite/proto/conns.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index adefa2ae..2a175126 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -1223,7 +1223,11 @@ def CloseTunnel(self, tunnel_closed=False): def _FrontEnd(conn, address, proto, host, on_port, body, conns): # This is when an external user connects to a server and requests a # web-page. We have to give it to them! - self = UserConn(address, ui=conns.config.ui) + try: + self = UserConn(address, ui=conns.config.ui) + except (ValueError, IOError, OSError): + conn.LogDebug('Unable to create new connection object!') + return None self.conns = conns self.SetConn(conn) @@ -1301,7 +1305,12 @@ def _FrontEnd(conn, address, proto, host, on_port, body, conns): def _BackEnd(proto, host, sid, tunnel, on_port, remote_ip=None, remote_port=None, data=None): # This is when we open a backend connection, because a user asked for it. - self = UserConn(None, ui=tunnel.conns.config.ui) + try: + self = UserConn(None, ui=tunnel.conns.config.ui) + except (ValueError, IOError, OSError): + tunnel.LogDebug('Unable to create new connection object!') + return None + self.sid = sid self.proto = proto self.host = host From 36f58e59f8655fbf598d5feb4755e882ea50deb2 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Thu, 4 Jul 2019 12:14:37 +0000 Subject: [PATCH 08/36] Give useful feedback if we cannot write our config file --- doc/HISTORY.txt | 8 ++++++++ pagekite/pk.py | 33 ++++++++++++++++++++++++++------- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 15d1e765..90b2d9b9 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -1,6 +1,14 @@ Version history - highlights ============================ +v1.0.0.?????? +------------- + - Fix a few minor buglets + - Added experimental support for relay connections over websockets + - Debian: improve .deb config samples + - Improve errors/feedback if config is unwritable during signup, add, etc. + + v1.0.0.190225 2019.02.25 ------------- - Call this 1.0, change versioning schemes. We're pretty stable! diff --git a/pagekite/pk.py b/pagekite/pk.py index ded44b41..d320df07 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -1569,6 +1569,20 @@ def ListKites(self): def PrintSettings(self, safe=False): print '\n'.join(self.GenerateConfig(safe=safe)) + def CanSaveConfig(self, savefile=None, _raise=None): + savefile = savefile or self.savefile or self.rcfile + try: + if os.path.exists(savefile): + open(savefile, 'r+').close() + else: + open(savefile, 'w').close() # FIXME: Python3.3 adds mode=x, use it! + os.remove(savefile) + except (IOError, OSError): + if _raise is not None: + raise _raise("Could not write to: %s" % savefile) + return False + return savefile + def SaveUserConfig(self, quiet=False): self.savefile = self.savefile or self.rcfile try: @@ -2109,27 +2123,31 @@ def Configure(self, argv): self.ConfigureFromDirectory(arg) elif opt in ('-S', '--savefile'): if self.savefile: raise ConfigError('Multiple save-files!') - self.savefile = arg + self.savefile = self.CanSaveConfig(savefile=arg, _raise=ConfigError) elif opt == '--shell': self.shell = True elif opt == '--save': - self.save = True + self.save = self.CanSaveConfig(_raise=ConfigError) and True elif opt == '--only': - self.save = self.kite_only = True + self.kite_only = True if self.kite_remove or self.kite_add or self.kite_disable: raise ConfigError('One change at a time please!') + self.save = self.CanSaveConfig(_raise=ConfigError) and True elif opt == '--add': - self.save = self.kite_add = True + self.kite_add = True if self.kite_remove or self.kite_only or self.kite_disable: raise ConfigError('One change at a time please!') + self.save = self.CanSaveConfig(_raise=ConfigError) and True elif opt == '--remove': - self.save = self.kite_remove = True + self.kite_remove = True if self.kite_add or self.kite_only or self.kite_disable: raise ConfigError('One change at a time please!') + self.save = self.CanSaveConfig(_raise=ConfigError) and True elif opt == '--disable': - self.save = self.kite_disable = True + self.kite_disable = True if self.kite_add or self.kite_only or self.kite_remove: raise ConfigError('One change at a time please!') + self.save = self.CanSaveConfig(_raise=ConfigError) and True elif opt == '--list': pass elif opt in ('-I', '--pidfile'): self.pidfile = arg @@ -2678,6 +2696,7 @@ def _KiteInfo(self, kitename): def RegisterNewKite(self, kitename=None, first=False, ask_be=False, autoconfigure=False): + self.CanSaveConfig(_raise=ConfigError) registered = False if kitename: (secret, is_subdomain_of, is_service_domain, @@ -4080,7 +4099,7 @@ def Configure(pk): if '--signup' in sys.argv or friendly_mode: pk.RegisterNewKite(autoconfigure=True, first=True) if friendly_mode: - pk.save = True + pk.save = pk.CanSaveConfig(_raise=ConfigError) and True pk.CheckConfig() From 2c68bef1cee47c55fcb250a5f6723a5fc960cab5 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 13 Jul 2019 14:31:09 +0000 Subject: [PATCH 09/36] Fix --shell if a HTTPD is configured, speed up CLI shutdown --- pagekite/pk.py | 29 +++++++++++++---------------- pagekite/ui/remote.py | 4 ++++ pagekite/yamond.py | 1 + 3 files changed, 18 insertions(+), 16 deletions(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index d320df07..94844a98 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -2072,12 +2072,14 @@ def BindUiSspec(self, force=False): if self.ui_httpd and self.ui_httpd.httpd: if not force: return self.ui_sspec self.ui_httpd.httpd.socket.close() + if self.ui_httpd: + self.ui_httpd.quit() self.ui_sspec = self.ui_sspec or ('localhost', 0) self.ui_httpd = HttpUiThread(self, self.conns, handler=self.ui_request_handler, server=self.ui_http_server, - ssl_pem_filename = self.ui_pemfile) + ssl_pem_filename=self.ui_pemfile) return self.ui_sspec def LoadMOTD(self): @@ -3948,18 +3950,8 @@ def reopen(x,y): self.ui.Status('exiting', message='Stopping...') logging.Log([('stopping', 'pagekite.py')]) - if self.ui_httpd: - self.ui_httpd.quit() if self.ui_comm: self.ui_comm.quit() - if self.tunnel_manager: - self.tunnel_manager.quit() - if self.conns: - if self.conns.auth_pool: - for th in self.conns.auth_pool: - th.quit() - for conn in self.conns.conns: - conn.Cleanup() ##[ Main ]##################################################################### @@ -3985,7 +3977,9 @@ def Main(pagekite, configure, uiclass=NullUi, raise ConfigError(e) shell_mode = shell_mode or pk.shell - if shell_mode is not True: + if shell_mode is True: + pk.FallDown('', help=False, noexit=True) + else: pk.Start() except (ConfigError, getopt.GetoptError), msg: @@ -3994,10 +3988,11 @@ def Main(pagekite, configure, uiclass=NullUi, shell_mode = 'more' except KeyboardInterrupt, msg: - pk.FallDown(None, help=False, noexit=True) if shell_mode: + pk.FallDown(None, help=False, noexit=True) shell_mode = 'auto' else: + pk.ui.Status('exiting', message='Good-bye!') return except SystemExit, status: @@ -4007,15 +4002,17 @@ def Main(pagekite, configure, uiclass=NullUi, sys.exit(status) except Exception, msg: - traceback.print_exc(file=sys.stderr) - if pk.crash_report_url: + crash_msg = format_exc() + logging.LogDebug('Crashed: %s' % crash_msg) + sys.stderr.write('Crashed: %s\n' % crash_msg) + if pk.crash_report_url and not shell_mode: try: print 'Submitting crash report to %s' % pk.crash_report_url logging.LogDebug(''.join(urllib.urlopen(pk.crash_report_url, urllib.urlencode({ 'platform': sys.platform, 'appver': APPVER, - 'crash': format_exc() + 'crash': crash_msg })).readlines())) except Exception, e: print 'FAILED: %s' % e diff --git a/pagekite/ui/remote.py b/pagekite/ui/remote.py index 0344a360..99bb9e20 100755 --- a/pagekite/ui/remote.py +++ b/pagekite/ui/remote.py @@ -252,6 +252,8 @@ def Working(self, message): class PageKiteThread(threading.Thread): + daemon = True + def __init__(self, startup_args=None, debug=False): threading.Thread.__init__(self) self.pk = None @@ -414,6 +416,8 @@ def quit(self): class CommThread(threading.Thread): + daemon = True + def __init__(self, pkThread): threading.Thread.__init__(self) self.pkThread = pkThread diff --git a/pagekite/yamond.py b/pagekite/yamond.py index 8508717b..0439dcc7 100755 --- a/pagekite/yamond.py +++ b/pagekite/yamond.py @@ -95,6 +95,7 @@ def __init__(self, yamond, handler): class YamonD(threading.Thread): """Handle HTTP in a separate thread.""" + daemon = True def __init__(self, sspec, server=YamonHttpServer, From 53d8dc6eb3870c2f79b5a738feb424126f164cfd Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sun, 21 Jul 2019 20:50:09 +0000 Subject: [PATCH 10/36] Avoid duplicate IPs in DynDNS updates --- pagekite/pk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index 94844a98..61863fe7 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -3526,7 +3526,7 @@ def connect_in_thread(conns, server, state): bips.append(ip) for ip in (ips or bips): - if ip not in domains[domain]: + if ip not in domains[domain][1]: domains[domain][1].append(ip) updates = {} From 145ae37e8758eecf93ad2bd73219c08eb20926b7 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sun, 21 Jul 2019 21:01:24 +0000 Subject: [PATCH 11/36] Call this v1.0.0.190721 --- doc/HISTORY.txt | 8 ++++---- pagekite/common.py | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 90b2d9b9..447dd962 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -1,15 +1,15 @@ Version history - highlights ============================ -v1.0.0.?????? +v1.0.0.190721 ------------- - - Fix a few minor buglets - - Added experimental support for relay connections over websockets - Debian: improve .deb config samples + - Fix buglets: broken --shell, dup IPs in DynDNS, logrotate config, ... + - Postpone allocation of ZLib decompressor until we see a ZChunk - Improve errors/feedback if config is unwritable during signup, add, etc. -v1.0.0.190225 2019.02.25 +v1.0.0.190225 <- The date is there now! ------------- - Call this 1.0, change versioning schemes. We're pretty stable! - UI: Made relay capability description and quota report less confusing diff --git a/pagekite/common.py b/pagekite/common.py index e877fde5..e07e5a43 100755 --- a/pagekite/common.py +++ b/pagekite/common.py @@ -24,7 +24,7 @@ import time PROTOVER = '0.8' -APPVER = '1.0.0.190225' +APPVER = '1.0.0.190721' AUTHOR = 'Bjarni Runar Einarsson, http://bre.klaki.net/' WWWHOME = 'https://pagekite.net/' LICENSE_URL = 'http://www.gnu.org/licenses/agpl.html' From 642c04cd598429fa37b4c96f5bf562e1784de205 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 27 Mar 2020 11:11:51 +0000 Subject: [PATCH 12/36] Avoid crash if getsockname() fails --- pagekite/proto/selectables.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 4cf9892b..7d54de71 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -596,7 +596,10 @@ def Die(self, discard_buffer=False): def HTTP_Unavail(self, config, where, proto, host, **kwargs): kwargs['frame_url'] = config.error_url if self.fd and where in ('FE', 'fe'): - kwargs['relay_sockname'] = self.fd.getsockname() + try: + kwargs['relay_sockname'] = self.fd.getsockname() + except: + kwargs['relay_sockname'] = None # Do we have a more specific error URL for this domain? This is a # white-label feature, for folks not wanting to hit the PageKite.net From 13d82e5614e42c4776e705246f8aabb905d16314 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 27 Mar 2020 12:04:23 +0000 Subject: [PATCH 13/36] Allow loading frontend IP list from a file instead of using DNS --- doc/HISTORY.txt | 6 ++++++ doc/MANPAGE.md | 4 ++++ pagekite/manual.py | 4 ++++ pagekite/pk.py | 20 +++++++++++++++----- 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 447dd962..fa349e79 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -1,6 +1,12 @@ Version history - highlights ============================ +v1.0.0.200327 +------------- + - Allow loading frontend IP list from a file instead of using DNS + - Avoid crash if getsockname() fails + + v1.0.0.190721 ------------- - Debian: improve .deb config samples diff --git a/doc/MANPAGE.md b/doc/MANPAGE.md index ead46366..120b8997 100644 --- a/doc/MANPAGE.md +++ b/doc/MANPAGE.md @@ -210,6 +210,10 @@ time the program defaults will Just Work. name, using the given port number. Default behavior is to probe all addresses and use the fastest one. + * --frontends=`num`:`@/path/to/file`:`port`
+ Same as above, except the IP address list will be loaded from + a file (and reloaded periodically), instead of using DNS. + * --nofrontend=`ip`:`port`
Never connect to the named front-end server. This can be used to exclude some front-ends from auto-configuration. diff --git a/pagekite/manual.py b/pagekite/manual.py index dcb4079a..59ddc20d 100755 --- a/pagekite/manual.py +++ b/pagekite/manual.py @@ -182,6 +182,10 @@ name, using the given port number. Default behavior is to probe all addresses and use the fastest one. + --frontends=num:@/path/to/file:port __ + Same as above, except the IP address list will be loaded from + a file (and reloaded periodically), instead of using DNS. + --nofrontend=ip:port __ Never connect to the named front-end server. This can be used to exclude some front-ends from auto-configuration. diff --git a/pagekite/pk.py b/pagekite/pk.py index 61863fe7..7042caa3 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -3166,11 +3166,21 @@ def Ping(self, host, port, overload_ms=250, bias=None): def GetHostIpAddrs(self, host): rv = [] - try: - info = socket.getaddrinfo(host, 0, socket.AF_UNSPEC, socket.SOCK_STREAM) - rv = [i[4][0] for i in info] - except AttributeError: - rv = socket.gethostbyname_ex(host)[2] + if host[:1] == '@': + try: + for line in (l.strip() for l in open(host[1:], 'r')): + if line and line[:1] not in ('#', ';'): + rv.append(line) + logging.LogDebug('Loaded %d IPs from %s' % (len(rv), host[1:])) + except: + logging.LogDebug('Failed to load IPs from %s' % host[1:]) + raise + else: + try: + info = socket.getaddrinfo(host, 0, socket.AF_UNSPEC, socket.SOCK_STREAM) + rv = [i[4][0] for i in info] + except AttributeError: + rv = socket.gethostbyname_ex(host)[2] return rv def CachedGetHostIpAddrs(self, host): From 816931048feec51db78ac08e1912ed55dc1f580b Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 27 Mar 2020 12:08:05 +0000 Subject: [PATCH 14/36] Call this v1.0.0.200327, oh my it is now 2020. --- doc/MANPAGE.md | 2 +- pagekite/__init__.py | 2 +- pagekite/__main__.py | 2 +- pagekite/android.py | 2 +- pagekite/common.py | 4 ++-- pagekite/compat.py | 2 +- pagekite/dropper.py | 2 +- pagekite/httpd.py | 2 +- pagekite/logging.py | 2 +- pagekite/logparse.py | 2 +- pagekite/manual.py | 2 +- pagekite/pk.py | 2 +- pagekite/yamond.py | 2 +- 13 files changed, 14 insertions(+), 14 deletions(-) diff --git a/doc/MANPAGE.md b/doc/MANPAGE.md index 120b8997..69a44f4a 100644 --- a/doc/MANPAGE.md +++ b/doc/MANPAGE.md @@ -489,7 +489,7 @@ lapcat(1), , ## Copyright and license ## -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni R. Einarsson. +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni R. Einarsson. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/pagekite/__init__.py b/pagekite/__init__.py index f8f0a1e7..2c9af6b0 100755 --- a/pagekite/__init__.py +++ b/pagekite/__init__.py @@ -1,7 +1,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/__main__.py b/pagekite/__main__.py index c77937e2..1614a196 100755 --- a/pagekite/__main__.py +++ b/pagekite/__main__.py @@ -5,7 +5,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/android.py b/pagekite/android.py index cb1bf3d6..2ea6a409 100755 --- a/pagekite/android.py +++ b/pagekite/android.py @@ -4,7 +4,7 @@ ############################################################################# LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/common.py b/pagekite/common.py index e07e5a43..e16a2879 100755 --- a/pagekite/common.py +++ b/pagekite/common.py @@ -4,7 +4,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free @@ -24,7 +24,7 @@ import time PROTOVER = '0.8' -APPVER = '1.0.0.190721' +APPVER = '1.0.0.200327' AUTHOR = 'Bjarni Runar Einarsson, http://bre.klaki.net/' WWWHOME = 'https://pagekite.net/' LICENSE_URL = 'http://www.gnu.org/licenses/agpl.html' diff --git a/pagekite/compat.py b/pagekite/compat.py index 1611d157..d0ebc22b 100755 --- a/pagekite/compat.py +++ b/pagekite/compat.py @@ -4,7 +4,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/dropper.py b/pagekite/dropper.py index abaff1ef..7bb0649c 100755 --- a/pagekite/dropper.py +++ b/pagekite/dropper.py @@ -5,7 +5,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/httpd.py b/pagekite/httpd.py index 81bd9f9f..91348b5c 100755 --- a/pagekite/httpd.py +++ b/pagekite/httpd.py @@ -4,7 +4,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/logging.py b/pagekite/logging.py index 9d429c40..be1781e7 100755 --- a/pagekite/logging.py +++ b/pagekite/logging.py @@ -4,7 +4,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/logparse.py b/pagekite/logparse.py index 647b2fbb..27e72fd3 100755 --- a/pagekite/logparse.py +++ b/pagekite/logparse.py @@ -5,7 +5,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/manual.py b/pagekite/manual.py index 59ddc20d..62a66331 100755 --- a/pagekite/manual.py +++ b/pagekite/manual.py @@ -425,7 +425,7 @@ `demo_auth_app.py` which implements this protocol. """) MAN_LICENSE = ("""\ - Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni R. Einarsson. + Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni R. Einarsson. This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by diff --git a/pagekite/pk.py b/pagekite/pk.py index 7042caa3..53dedd5d 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -5,7 +5,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free diff --git a/pagekite/yamond.py b/pagekite/yamond.py index 0439dcc7..cf700cb8 100755 --- a/pagekite/yamond.py +++ b/pagekite/yamond.py @@ -5,7 +5,7 @@ ############################################################################## LICENSE = """\ This file is part of pagekite.py. -Copyright 2010-2019, the Beanstalks Project ehf. and Bjarni Runar Einarsson +Copyright 2010-2020, the Beanstalks Project ehf. and Bjarni Runar Einarsson This program is free software: you can redistribute it and/or modify it under the terms of the GNU Affero General Public License as published by the Free From c5bba8bcda42471ec981ddfcf83c0f3b8300dce6 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Tue, 7 Apr 2020 23:36:40 +0000 Subject: [PATCH 15/36] Precautions: Add locks, improve error handling --- pagekite/pk.py | 258 +++++++++++++++++++--------------- pagekite/proto/selectables.py | 5 +- 2 files changed, 145 insertions(+), 118 deletions(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index 53dedd5d..6bf1cd9d 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -23,6 +23,7 @@ ############################################################################## import base64 import cgi +import copy from cgi import escape as escape_html import errno import getopt @@ -332,6 +333,7 @@ class Connections(object): def __init__(self, config): self.config = config self.ip_tracker = {} + self.lock = threading.Lock() self.idle = [] self.conns = [] self.conns_by_id = {} @@ -346,67 +348,74 @@ def start(self, auth_threads=None, auth_thread_count=1): th.start() def Add(self, conn): - self.conns.append(conn) + with self.lock: + self.conns.append(conn) def auth(self): if common.gYamon: common.gYamon.vset('auth_threads', len(self.auth_pool)) common.gYamon.vset('auth_thread_qtime', sum([at.qtime for at in self.auth_pool] - ) / len(self.auth_pool)) + ) / (len(self.auth_pool) or 1)) common.gYamon.vset('auth_thread_jobs', sum([len(at.jobs) for at in self.auth_pool])) return self.auth_pool[random.randint(0, len(self.auth_pool)-1)] def SetAltId(self, conn, new_id): - if conn.alt_id and conn.alt_id in self.conns_by_id: - del self.conns_by_id[conn.alt_id] - if new_id: - self.conns_by_id[new_id] = conn - conn.alt_id = new_id + with self.lock: + if conn.alt_id and conn.alt_id in self.conns_by_id: + del self.conns_by_id[conn.alt_id] + if new_id: + self.conns_by_id[new_id] = conn + conn.alt_id = new_id def SetIdle(self, conn, seconds): - self.idle.append((time.time() + seconds, conn.last_activity, conn)) + with self.lock: + self.idle.append((time.time() + seconds, conn.last_activity, conn)) def TrackIP(self, ip, domain): tick = '%d' % (time.time()/12) - if tick not in self.ip_tracker: - deadline = int(tick)-10 - for ot in self.ip_tracker.keys(): - if int(ot) < deadline: - del self.ip_tracker[ot] - self.ip_tracker[tick] = {} - - if ip not in self.ip_tracker[tick]: - self.ip_tracker[tick][ip] = [1, domain] - else: - self.ip_tracker[tick][ip][0] += 1 - self.ip_tracker[tick][ip][1] = domain + with self.lock: + if tick not in self.ip_tracker: + deadline = int(tick)-10 + for ot in self.ip_tracker.keys(): + if int(ot) < deadline: + del self.ip_tracker[ot] + self.ip_tracker[tick] = {} + + if ip not in self.ip_tracker[tick]: + self.ip_tracker[tick][ip] = [1, domain] + else: + self.ip_tracker[tick][ip][0] += 1 + self.ip_tracker[tick][ip][1] = domain def LastIpDomain(self, ip): domain = None - for tick in sorted(self.ip_tracker.keys()): + with self.lock: + _keys = sorted(self.ip_tracker.keys()) + for tick in _keys: if ip in self.ip_tracker[tick]: domain = self.ip_tracker[tick][ip][1] return domain def Remove(self, conn, retry=True): try: - if conn.alt_id and conn.alt_id in self.conns_by_id: - del self.conns_by_id[conn.alt_id] - if conn in self.conns: - self.conns.remove(conn) - rmp = [] - for elc in self.idle: - if elc[-1] == conn: - rmp.append(elc) - for elc in rmp: - self.idle.remove(elc) - for tid, tunnels in self.tunnels.items(): - if conn in tunnels: - tunnels.remove(conn) - if not tunnels: - del self.tunnels[tid] + with self.lock: + if conn.alt_id and conn.alt_id in self.conns_by_id: + del self.conns_by_id[conn.alt_id] + if conn in self.conns: + self.conns.remove(conn) + rmp = [] + for elc in self.idle: + if elc[-1] == conn: + rmp.append(elc) + for elc in rmp: + self.idle.remove(elc) + for tid, tunnels in self.tunnels.items(): + if conn in tunnels: + tunnels.remove(conn) + if not tunnels: + del self.tunnels[tid] except (ValueError, KeyError): # Let's not asplode if another thread races us for this. logging.LogError('Failed to remove %s: %s' % (conn, format_exc())) @@ -414,64 +423,75 @@ def Remove(self, conn, retry=True): return self.Remove(conn, retry=False) def IdleConns(self): - return [p[-1] for p in self.idle] + with self.lock: + return [p[-1] for p in self.idle] def Sockets(self): - return [s.fd for s in self.conns] + with self.lock: + return [s.fd for s in self.conns] def Readable(self): # FIXME: This is O(n) now = time.time() - return [s.fd for s in self.conns if s.IsReadable(now)] + with self.lock: + return [s.fd for s in self.conns if s.IsReadable(now)] def Blocked(self): # FIXME: This is O(n) # Magic side-effect: update buffered byte counter - blocked = [s for s in self.conns if s.IsBlocked()] - common.buffered_bytes[0] = sum([len(s.write_blocked) for s in blocked]) - return [s.fd for s in blocked] + with self.lock: + blocked = [s for s in self.conns if s.IsBlocked()] + common.buffered_bytes[0] = sum([len(s.write_blocked) for s in blocked]) + return [s.fd for s in blocked] def DeadConns(self): - return [s for s in self.conns if s.IsDead()] + with self.lock: + return [s for s in self.conns if s.IsDead()] def CleanFds(self): evil = [] - for s in self.conns: - try: - i, o, e = select.select([s.fd], [s.fd], [s.fd], 0) - except: - evil.append(s) + with self.lock: + for s in self.conns: + try: + i, o, e = select.select([s.fd], [s.fd], [s.fd], 0) + except: + evil.append(s) for s in evil: logging.LogDebug('Removing broken Selectable: %s' % s) s.Cleanup() self.Remove(s) def Connection(self, fd): - for conn in self.conns: - if conn.fd == fd: - return conn + with self.lock: + for conn in self.conns: + if conn.fd == fd: + return conn return None def TunnelServers(self): servers = {} - for tid in self.tunnels: - for tunnel in self.tunnels[tid]: - server = tunnel.server_info[tunnel.S_NAME] - if server is not None: - servers[server] = 1 + with self.lock: + for tid in self.tunnels: + for tunnel in self.tunnels[tid]: + server = tunnel.server_info[tunnel.S_NAME] + if server is not None: + servers[server] = 1 return servers.keys() def CloseTunnel(self, proto, domain, conn): - tid = '%s:%s' % (proto, domain) - if tid in self.tunnels: - if conn in self.tunnels[tid]: - self.tunnels[tid].remove(conn) - if not self.tunnels[tid]: - del self.tunnels[tid] + with self.lock: + tid = '%s:%s' % (proto, domain) + if tid in self.tunnels: + if conn in self.tunnels[tid]: + self.tunnels[tid].remove(conn) + if not self.tunnels[tid]: + del self.tunnels[tid] def CheckIdleConns(self, now): active = [] - for elc in self.idle: + with self.lock: + _idle = copy.copy(self.idle) + for elc in _idle: expire, last_activity, conn = elc if conn.last_activity > last_activity: active.append(elc) @@ -480,30 +500,33 @@ def CheckIdleConns(self, now): conn.Die(discard_buffer=True) elif conn.created < now - 1: conn.SayHello() - for pair in active: - self.idle.remove(pair) + with self.lock: + for pair in active: + if pair in self.idle: + self.idle.remove(pair) def Tunnel(self, proto, domain, conn=None): - tid = '%s:%s' % (proto, domain) - if conn is not None: - if tid not in self.tunnels: - self.tunnels[tid] = [] - self.tunnels[tid].append(conn) - - if tid in self.tunnels: - return self.tunnels[tid] - else: - try: - dparts = domain.split('.')[1:] - while len(dparts) > 1: - wild_tid = '%s:*.%s' % (proto, '.'.join(dparts)) - if wild_tid in self.tunnels: - return self.tunnels[wild_tid] - dparts = dparts[1:] - except: - pass + with self.lock: + tid = '%s:%s' % (proto, domain) + if conn is not None: + if tid not in self.tunnels: + self.tunnels[tid] = [] + self.tunnels[tid].append(conn) + + if tid in self.tunnels: + return self.tunnels[tid] + else: + try: + dparts = domain.split('.')[1:] + while len(dparts) > 1: + wild_tid = '%s:*.%s' % (proto, '.'.join(dparts)) + if wild_tid in self.tunnels: + return self.tunnels[wild_tid] + dparts = dparts[1:] + except: + pass - return [] + return [] class HttpUiThread(threading.Thread): @@ -3724,41 +3747,42 @@ def Epoll(self, epoll, waittime): broken = False try: bbc = 0 - for c in self.conns.conns: - fd, mask = c.fd, 0 - if not c.IsDead(): - if c.IsBlocked(): - bbc += len(c.write_blocked) - mask |= select.EPOLLOUT - if c.IsReadable(now): - mask |= select.EPOLLIN - - if mask: - try: - fdc[fd.fileno()] = fd - except socket.error: - # If this fails, then the socket has HUPed, however we need to - # bypass epoll to make sure that's reflected in iready below. - bid = 'dead-%d' % len(evs) - fdc[bid] = fd - evs.append((bid, select.EPOLLHUP)) - # Trigger removal of c.fd, if it was still in the epoll. - fd, mask = None, 0 - - if mask: - try: - epoll.modify(fd, mask) - except IOError: + with self.conns.lock: + for c in self.conns.conns: + fd, mask = c.fd, 0 + if not c.IsDead(): + if c.IsBlocked(): + bbc += len(c.write_blocked) + mask |= select.EPOLLOUT + if c.IsReadable(now): + mask |= select.EPOLLIN + + if mask: try: - epoll.register(fd, mask) + fdc[fd.fileno()] = fd + except socket.error: + # If this fails, then the socket has HUPed, however we need to + # bypass epoll to make sure that's reflected in iready below. + bid = 'dead-%d' % len(evs) + fdc[bid] = fd + evs.append((bid, select.EPOLLHUP)) + # Trigger removal of c.fd, if it was still in the epoll. + fd, mask = None, 0 + + if mask: + try: + epoll.modify(fd, mask) + except IOError: + try: + epoll.register(fd, mask) + except (IOError, TypeError): + evs.append((fd, select.EPOLLHUP)) # Error == HUP + else: + try: + epoll.unregister(c.fd) # Important: Use c.fd, not fd! except (IOError, TypeError): - evs.append((fd, select.EPOLLHUP)) # Error == HUP - else: - try: - epoll.unregister(c.fd) # Important: Use c.fd, not fd! - except (IOError, TypeError): - # Failing to unregister is OK, ignore - pass + # Failing to unregister is OK, ignore + pass common.buffered_bytes[0] = bbc evs.extend(epoll.poll(waittime)) diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 7d54de71..a9ccdfd2 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -188,9 +188,12 @@ def __str__(self): def __html__(self): try: peer = self.fd.getpeername() - sock = self.fd.getsockname() except: peer = ('x.x.x.x', 'x') + + try: + sock = self.fd.getsockname() + except: sock = ('x.x.x.x', 'x') return ('Outgoing ZChunks: %s
' From 7c04593ec42b7e78d5cc3d36616ecc389487682c Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Mon, 6 Apr 2020 01:29:00 +0000 Subject: [PATCH 16/36] Create an efficient fast-path for PageKite ping traffic --- doc/HISTORY.txt | 6 ++ pagekite/compat.py | 5 ++ pagekite/pk.py | 2 + pagekite/proto/conns.py | 142 +++++++++++++++++++++++++++++++++----- pagekite/proto/parsers.py | 4 +- 5 files changed, 141 insertions(+), 18 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index fa349e79..706970b9 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -1,6 +1,12 @@ Version history - highlights ============================ + +v1.0.0.200406 +------------- + - Create ping.pagekite fast-path in dedicated thread + + v1.0.0.200327 ------------- - Allow loading frontend IP list from a file instead of using DNS diff --git a/pagekite/compat.py b/pagekite/compat.py index d0ebc22b..1aed90c9 100755 --- a/pagekite/compat.py +++ b/pagekite/compat.py @@ -100,6 +100,11 @@ def format_exc(): traceback.print_exc(file=sio) return sio.getvalue() +try: + from Queue import Queue +except ImportError: + from queue import Queue + # Old Pythons lack rsplit def rsplit(ch, data): parts = data.split(ch) diff --git a/pagekite/pk.py b/pagekite/pk.py index 6bf1cd9d..a4ebd81a 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -339,6 +339,8 @@ def __init__(self, config): self.conns_by_id = {} self.tunnels = {} self.auth_pool = [] + self.ping_helper = FastPingHelper(self) + self.ping_helper.start() def start(self, auth_threads=None, auth_thread_count=1): self.auth_pool = auth_threads or [] diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index 2a175126..82bf45d3 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -25,7 +25,6 @@ import sys import threading import time -import traceback from pagekite.compat import * from pagekite.common import * @@ -37,6 +36,8 @@ from parsers import * from proto import * +SMTP_PORTS = (25, 465, 587, 2525) + class Tunnel(ChunkParser): """A Selectable representing a PageKite tunnel.""" @@ -1565,8 +1566,7 @@ def SayHello(self): return False else: self.said_hello = True - if self.on_port in (25, 125, 2525): - # FIXME: We don't actually support SMTP yet and 125 is bogus. + if self.on_port in SMTP_PORTS: self.Send(['220 ready ESMTP PageKite Magic Proxy\n'], try_flush=True) return True @@ -1932,6 +1932,109 @@ def __init__(self, fd, address, on_port, conns): self.Cleanup() +class FastPingHelper(threading.Thread): + def __init__(self, conns): + threading.Thread.__init__(self) + self.daemon = True + self.lock = threading.Lock() + self.conns = conns + self.config = conns.config + self.clients = [] + self.rejection = None + self.overloaded = False + self.processing = 0 + self.sleeptime = 0.03 + self.fast_pinged = [] + self.next_pinglog = time.time() + 1 + self.wq = Queue() + self.up_rejection() + + def up_rejection(self): + self.overloaded = self.config.Overloaded() + self.rejection = HTTP_Unavailable('fe', 'http', 'ping.pagekite', + overloaded=self.overloaded, + advertise=False) + + def add_client(self, client, addr, handler): + with self.lock: + if self.processing < 1 and not self.clients: + ping_queue = True + else: + ping_queue = False + + client.setblocking(0) + self.clients.append((time.time(), client, addr, handler)) + if ping_queue: + self.wq.put(1) + + def run_once(self): + now = time.time() + self.processing = len(self.clients) + with self.lock: + _clients, self.clients = self.clients, [] + for ts, client, addr, handler in _clients: + try: + data = client.recv(64, socket.MSG_PEEK) + except: + data = None + try: + if data: + if '\nHost: ping.pagekite' in data: + client.send(self.rejection) + client.close() + self.fast_pinged.append(obfuIp(addr[0])) + else: + handler(client, addr) + elif ts > (now-5): + with self.lock: + self.clients.append((ts, client, addr, handler)) + else: + logging.LogDebug('Timeout, dropping ' + obfuIp(addr[0])) + client.close() + except IOError: + logging.LogDebug('IOError, dropping ' + obfuIp(addr[0])) + # No action: just let the client get garbage collected + except: + pass + self.processing -= 1 + + if now > self.next_pinglog: + if self.fast_pinged: + logging.LogDebug('Fast ping %s %d clients: %s' % ( + 'discouraged' if self.overloaded else 'welcomed', + len(self.fast_pinged), + ', '.join(self.fast_pinged))) + self.fast_pinged = [] + self.up_rejection() + self.next_pinglog = now + 1 + + self.sleeptime = max(0, (now + 0.015) - time.time()) + + def run_until(self, deadline): + try: + self.sleeptime = 0.03 + while (time.time() + self.sleeptime) < deadline and self.clients: + while not self.wq.empty(): + self.wq.get() + time.sleep(self.sleeptime) + self.run_once() + except: + logging.LogError('FastPingHelper crashed: ' + format_exc()) + + def run(self): + while True: + try: + while True: + while not self.clients or not self.wq.empty(): + self.wq.get() + self.sleeptime = 0.03 + time.sleep(self.sleeptime) + self.run_once() + except: + logging.LogError('FastPingHelper crashed: ' + format_exc()) + time.sleep(1) + + class Listener(Selectable): """This class listens for incoming connections and accepts them.""" @@ -1958,7 +2061,7 @@ def __html__(self): return '

Listening on port %s for %s

' % (self.port, self.connclass) def check_acl(self, ipaddr, default=True): - if self.acl: + if self.acl and os.path.exists(self.acl): try: ipaddr = '%s' % ipaddr lc = 0 @@ -1980,27 +2083,34 @@ def check_acl(self, ipaddr, default=True): except IndexError: self.LogDebug('Invalid line %d in ACL %s' % (lc, self.acl)) except: - self.LogDebug('Failed to read/parse %s' % self.acl) + self.LogDebug( + 'Failed to read/parse %s: %s' % (self.acl, format_exc())) self.acl_match = (0, '.*', default and 'allow' or 'reject', 'Default') return default + def HandleClient(self, client, address): + if self.check_acl(address[0]): + log_info = [('accept', '%s:%s' % (obfuIp(address[0]), address[1]))] + uc = self.connclass(client, address, self.port, self.conns) + else: + log_info = [('reject', '%s:%s' % (obfuIp(address[0]), address[1]))] + client.close() + if self.acl: + log_info.extend([('acl_line', '%s' % self.acl_match[0]), + ('reason', self.acl_match[3])]) + self.Log(log_info) + return True + def ReadData(self, maxread=None): try: self.last_activity = time.time() client, address = self.fd.accept() if client: - if self.check_acl(address[0]): - log_info = [('accept', '%s:%s' % (obfuIp(address[0]), address[1]))] - uc = self.connclass(client, address, self.port, self.conns) + if self.port not in SMTP_PORTS: + self.conns.ping_helper.add_client(client, address, self.HandleClient) else: - log_info = [('reject', '%s:%s' % (obfuIp(address[0]), address[1]))] - client.close() - if self.acl: - log_info.extend([('acl_line', '%s' % self.acl_match[0]), - ('reason', self.acl_match[3])]) - self.Log(log_info) - return True - + self.HandleClient(client, address) + return True except IOError, err: self.LogDebug('Listener::ReadData: error: %s (%s)' % (err, err.errno)) diff --git a/pagekite/proto/parsers.py b/pagekite/proto/parsers.py index 3c8799d0..2bbe741f 100755 --- a/pagekite/proto/parsers.py +++ b/pagekite/proto/parsers.py @@ -183,7 +183,7 @@ def Parse(self, line): return self.ParseBody(line) except ValueError, err: - logging.LogDebug('Parse failed: %s, %s, %s' % (self.state, err, self.lines)) + logging.LogDebug('HTTP parse failed: %s, %s, %s' % (self.state, err, self.lines)) self.state = BaseLineParser.PARSE_FAILED return False @@ -265,7 +265,7 @@ def Parse(self, line): else: self.state = BaseLineParser.PARSE_FAILED except Exception, err: - logging.LogDebug('Parse failed: %s, %s, %s' % (self.state, err, self.lines)) + logging.LogDebug('IRC parse failed: %s, %s, %s' % (self.state, err, self.lines)) self.state = BaseLineParser.PARSE_FAILED return (self.state != BaseLineParser.PARSE_FAILED) From eda3709636b1d2d60ca949de60f0936bab79f5e4 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 8 Apr 2020 01:47:26 +0000 Subject: [PATCH 17/36] Encourage batching in main loop, more logging --- pagekite/pk.py | 9 ++++++++- pagekite/proto/conns.py | 9 +++++---- 2 files changed, 13 insertions(+), 5 deletions(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index a4ebd81a..0b9e8137 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -3831,7 +3831,7 @@ def Loop(self): logging.LogDebug('Entering main %s loop' % (epoll and 'epoll' or 'select')) loop_count = 0 while self.keep_looping: - epoll, iready, oready, eready = mypoll(epoll, 1.1) + epoll, iready, oready, eready = mypoll(epoll, 1.10) now = time.time() if oready: @@ -3851,12 +3851,19 @@ def Loop(self): self.last_loop = now loop_count += 1 + snooze = 0 if oready else max(0, (now + 0.010) - time.time()) + if snooze: + time.sleep(snooze) + if now - self.last_barf > (logging.DEBUG_IO and 15 or 600): self.last_barf = now if epoll: epoll.close() epoll, mypoll = self.CreatePollObject() logging.LogDebug('Loop #%d, selectable map: %s' % (loop_count, SELECTABLES)) + if 0 == (loop_count % (5 if logging.DEBUG_IO else 100)): + logging.LogDebug('Loop #%d (i=%d, o=%d, e=%d, s=%.3fs) v%s' + % (loop_count, len(iready), len(oready), len(eready), snooze, APPVER)) if epoll: epoll.close() diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index 82bf45d3..d2b3c0b8 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -2089,15 +2089,16 @@ def check_acl(self, ipaddr, default=True): return default def HandleClient(self, client, address): + log_info = [('port', self.port)] if self.check_acl(address[0]): - log_info = [('accept', '%s:%s' % (obfuIp(address[0]), address[1]))] + log_info += [('accept', '%s:%s' % (obfuIp(address[0]), address[1]))] uc = self.connclass(client, address, self.port, self.conns) else: - log_info = [('reject', '%s:%s' % (obfuIp(address[0]), address[1]))] + log_info += [('reject', '%s:%s' % (obfuIp(address[0]), address[1]))] client.close() if self.acl: - log_info.extend([('acl_line', '%s' % self.acl_match[0]), - ('reason', self.acl_match[3])]) + log_info += [('acl_line', '%s' % self.acl_match[0]), + ('reason', self.acl_match[3])] self.Log(log_info) return True From cf7b6e73d8dd3ea47f31c504a5d5b81eca881796 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 8 Apr 2020 01:48:32 +0000 Subject: [PATCH 18/36] Avoid deadlocks in fast ping, disable fast ping for SMTP ports --- pagekite/proto/conns.py | 50 ++++++++++++++++++++--------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index d2b3c0b8..eb499261 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -1942,7 +1942,7 @@ def __init__(self, conns): self.clients = [] self.rejection = None self.overloaded = False - self.processing = 0 + self.waiting = True self.sleeptime = 0.03 self.fast_pinged = [] self.next_pinglog = time.time() + 1 @@ -1956,20 +1956,14 @@ def up_rejection(self): advertise=False) def add_client(self, client, addr, handler): + client.setblocking(0) with self.lock: - if self.processing < 1 and not self.clients: - ping_queue = True - else: - ping_queue = False - - client.setblocking(0) self.clients.append((time.time(), client, addr, handler)) - if ping_queue: - self.wq.put(1) + if self.waiting: + self.wq.put(1) def run_once(self): now = time.time() - self.processing = len(self.clients) with self.lock: _clients, self.clients = self.clients, [] for ts, client, addr, handler in _clients: @@ -1995,16 +1989,14 @@ def run_once(self): logging.LogDebug('IOError, dropping ' + obfuIp(addr[0])) # No action: just let the client get garbage collected except: - pass - self.processing -= 1 + logging.LogDebug('Error in FastPing: ' + format_exc()) if now > self.next_pinglog: - if self.fast_pinged: - logging.LogDebug('Fast ping %s %d clients: %s' % ( - 'discouraged' if self.overloaded else 'welcomed', - len(self.fast_pinged), - ', '.join(self.fast_pinged))) - self.fast_pinged = [] + logging.LogDebug('Fast ping %s %d clients: %s' % ( + 'discouraged' if self.overloaded else 'welcomed', + len(self.fast_pinged), + ', '.join(self.fast_pinged))) + self.fast_pinged = [] self.up_rejection() self.next_pinglog = now + 1 @@ -2012,10 +2004,12 @@ def run_once(self): def run_until(self, deadline): try: - self.sleeptime = 0.03 while (time.time() + self.sleeptime) < deadline and self.clients: + with self.lock: + self.waiting = True while not self.wq.empty(): self.wq.get() + self.waiting = False time.sleep(self.sleeptime) self.run_once() except: @@ -2025,9 +2019,11 @@ def run(self): while True: try: while True: + with self.lock: + self.waiting = True while not self.clients or not self.wq.empty(): self.wq.get() - self.sleeptime = 0.03 + self.waiting = False time.sleep(self.sleeptime) self.run_once() except: @@ -2106,11 +2102,15 @@ def ReadData(self, maxread=None): try: self.last_activity = time.time() client, address = self.fd.accept() - if client: - if self.port not in SMTP_PORTS: - self.conns.ping_helper.add_client(client, address, self.HandleClient) - else: - self.HandleClient(client, address) + if self.port not in SMTP_PORTS: + while client: + try: + self.conns.ping_helper.add_client(client, address, self.HandleClient) + client, address = self.fd.accept() + except IOError: + client = None + elif client: + self.HandleClient(client, address) return True except IOError, err: self.LogDebug('Listener::ReadData: error: %s (%s)' % (err, err.errno)) From f9b46d0c55404ce3d389bd89408ea34ba45b9930 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 8 Apr 2020 11:28:10 +0000 Subject: [PATCH 19/36] Comment the select-loop snooze effects, make snooze and maxread configurable. --- doc/HISTORY.txt | 1 + pagekite/common.py | 11 +++++++++- pagekite/pk.py | 39 ++++++++++++++++++++++++++++++----- pagekite/proto/conns.py | 5 +---- pagekite/proto/selectables.py | 4 ++-- 5 files changed, 48 insertions(+), 12 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 706970b9..a4eff905 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -5,6 +5,7 @@ Version history - highlights v1.0.0.200406 ------------- - Create ping.pagekite fast-path in dedicated thread + - Make select loop timing and read sizes configurable v1.0.0.200327 diff --git a/pagekite/common.py b/pagekite/common.py index e16a2879..d901aec2 100755 --- a/pagekite/common.py +++ b/pagekite/common.py @@ -72,8 +72,17 @@ LOOPBACK_BE = LOOPBACK_HN + ':2' LOOPBACK = {'FE': LOOPBACK_FE, 'BE': LOOPBACK_BE} +# This is how many bytes we are willing to read per cycle. +MAX_READ_BYTES = 16 * 1024 +MAX_READ_TUNNEL_X = 3.1 # 3x above, + fudge factor + +# Higher values save CPU and prevent individual tunnels +# from hogging all our resources, but hurt latency and +# reduce per-tunnel throughput. +SELECT_LOOP_MIN_MS = 5 + # Re-evaluate our choice of frontends every 45-60 minutes. -FE_PING_INTERVAL = (45 * 60) + random.randint(0, 900) +FE_PING_INTERVAL = (45 * 60) + random.randint(0, 900) # This is a global count of disconnect errors; we use this # to adjust the ping interval over time. diff --git a/pagekite/pk.py b/pagekite/pk.py index 0b9e8137..6f0465a3 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -80,7 +80,7 @@ 'fe_certname=', 'fe_nocertcheck', 'ca_certs=', 'kitename=', 'kitesecret=', 'fingerpath=', 'backend=', 'define_backend=', 'be_config=', - 'insecure', 'ratelimit_ips=', + 'insecure', 'ratelimit_ips=', 'max_read_bytes=', 'select_loop_min_ms=', 'service_on=', 'service_off=', 'service_cfg=', 'tunnel_acl=', 'client_acl=', 'accept_acl_file=', 'frontend=', 'nofrontend=', 'frontends=', 'keepalive=', @@ -1479,7 +1479,6 @@ def addManualFrontends(): com + ('overload_mem = %-5s # 0=fixed' % self.overload_cpu), com + ('overload_cpu = %-5s # 0=fixed' % self.overload_mem) ]) - config.extend([ '', '##[ Front-end access controls (default=deny, if configured) ]##', @@ -1511,8 +1510,13 @@ def addManualFrontends(): (self.no_probes and 'noprobes' or '# noprobes'), (self.crash_report_url and '# nocrashreport' or 'nocrashreport'), p('savefile = %s', safe and self.savefile, '/path/to/savefile'), - '', ]) + if common.MAX_READ_BYTES != 16*1024: + config.append('max_read_bytes = %sx%.3f' + % (common.MAX_READ_BYTES, common.MAX_READ_TUNNEL_X)) + if common.SELECT_LOOP_MIN_MS != 5: + config.append('select_loop_min_ms = %s' % common.SELECT_LOOP_MIN_MS) + config.append('') if self.daemonize or self.setuid or self.setgid or self.pidfile or new: config.extend([ @@ -2303,6 +2307,15 @@ def Configure(self, argv): which, limit = '*', arg self.GetDefaultIPsPerSecond(None, limit.strip()) # ValueErrors if bad self.ratelimit_ips[which.strip()] = limit.strip() + elif opt == '--max_read_bytes': + if 'x' in arg: + base, tmul = arg.split('x') + common.MAX_READ_BYTES = max(1024, int(base)) + common.MAX_READ_TUNNEL_X = max(1, float(tmul)) + else: + common.MAX_READ_BYTES = max(1024, int(arg)) + elif opt == '--select_loop_min_ms': + common.SELECT_LOOP_MIN_MS = max(0, min(int(arg), 100)) elif opt == '--accept_acl_file': self.accept_acl_file = arg elif opt == '--client_acl': @@ -3851,8 +3864,24 @@ def Loop(self): self.last_loop = now loop_count += 1 - snooze = 0 if oready else max(0, (now + 0.010) - time.time()) + # This delay does things! + # Pro: + # - Reduce overhead by batching IO events together + # Mixed: + # - Along with Tunnel.maxread, this caps the per-stream/tunnel + # bandwidth. The default SELECT_LOOP_MIN_MS=5, combined with + # a MAX_READ_BYTES=16 (doubled for tunnels) lets us read from + # the socket 200x/second: 200 * 32kB =~ 6MB/s. This is the + # MAXIMUM outgoing bandwidth of any live tunnel, limiting + # how much load any single connection can generate. Total + # incoming bandwidth per-conn is half that. + # Con: + # - Adds latency + # + snooze = max(0, (now + common.SELECT_LOOP_MIN_MS/1000.0) - time.time()) if snooze: + if oready: + snooze /= 2 time.sleep(snooze) if now - self.last_barf > (logging.DEBUG_IO and 15 or 600): @@ -3861,7 +3890,7 @@ def Loop(self): epoll.close() epoll, mypoll = self.CreatePollObject() logging.LogDebug('Loop #%d, selectable map: %s' % (loop_count, SELECTABLES)) - if 0 == (loop_count % (5 if logging.DEBUG_IO else 100)): + if 0 == (loop_count % (5 if logging.DEBUG_IO else 250)): logging.LogDebug('Loop #%d (i=%d, o=%d, e=%d, s=%.3fs) v%s' % (loop_count, len(iready), len(oready), len(eready), snooze, APPVER)) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index eb499261..1f46d4fe 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -54,10 +54,6 @@ def __init__(self, conns): ChunkParser.__init__(self, ui=conns.config.ui) self.server_info = ['x.x.x.x:x', [], [], [], False, False, None] self.Init(conns) - # We want to be sure to read the entire chunk at once, including - # headers to save cycles, so we double the size we're willing to - # read here. - self.maxread *= 2 def Init(self, conns): self.conns = conns @@ -70,6 +66,7 @@ def Init(self, conns): self.using_tls = False self.filters = [] self.ip_limits = None + self.maxread = int(common.MAX_READ_BYTES * common.MAX_READ_TUNNEL_X) def Cleanup(self, close=True): if self.users: diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index a9ccdfd2..0e5947b1 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -69,7 +69,7 @@ class Selectable(object): errno.EDEADLK, errno.EWOULDBLOCK, errno.ENOBUFS, errno.EALREADY) - def __init__(self, fd=None, address=None, on_port=None, maxread=16*1024, + def __init__(self, fd=None, address=None, on_port=None, maxread=None, ui=None, tracked=True, bind=None, backlog=100): self.fd = None @@ -103,7 +103,7 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=16*1024, self.q_days = None # Read-related variables - self.maxread = maxread + self.maxread = maxread or common.MAX_READ_BYTES self.read_bytes = self.all_in = 0 self.read_eof = False self.peeking = False From cfc48fce17eccc4027b155063ce6c978c06ffca0 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 10 Apr 2020 17:37:50 +0000 Subject: [PATCH 20/36] Remove 0.4.x flow-control, fix bugs in current flow control code --- doc/HISTORY.txt | 1 + pagekite/proto/conns.py | 38 ++++++----------- pagekite/proto/selectables.py | 77 +++++------------------------------ 3 files changed, 24 insertions(+), 92 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index a4eff905..2cecc1c2 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -6,6 +6,7 @@ v1.0.0.200406 ------------- - Create ping.pagekite fast-path in dedicated thread - Make select loop timing and read sizes configurable + - Remove 0.4.x flow-control, fix bugs in current flow control code v1.0.0.200327 diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index 1f46d4fe..13af7e42 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -704,11 +704,14 @@ def SendData(self, conn, data, sid=None, host=None, proto=None, port=None, sending.append(data) return self.SendChunked(sending, zhistory=self.zhistory.get(sid)) - # Larger amounts we break into fragments to work around bugs in - # some of our small-buffered embedded clients. We aim for roughly - # one fragment per packet, assuming an MTU of 1500 bytes. + # Larger amounts we break into fragments at the FE, to work around bugs + # in some of our small-buffered embedded clients. We aim for roughly + # one fragment per packet, assuming an MTU of 1500 bytes. We use + # much larger fragments at the back-end, relays can be assumed to + # be up-to-date and larger chunks saves CPU and improves throughput. + frag_size = self.conns.config.isfrontend and 1024 or (self.maxread+1024) sending.append('') - frag_size = max(1024, 1400-len(''.join(sending))) + frag_size = max(frag_size, 1400-len(''.join(sending))) first = True while data or first: sending[-1] = data[:frag_size] @@ -717,7 +720,7 @@ def SendData(self, conn, data, sid=None, host=None, proto=None, port=None, data = data[frag_size:] if first: sending = ['SID: %s\r\n' % sid, '\r\n', ''] - frag_size = max(1024, 1400-len(''.join(sending))) + frag_size = max(frag_size, 1400-len(''.join(sending))) first = False return True @@ -813,14 +816,11 @@ def SendQuota(self, pong=''): ) % (pong, self.quota[0]), compress=False, just_buffer=True) - def SendProgress(self, sid, conn, throttle=False): - # FIXME: Optimize this away unless meaningful progress has been made? + def SendProgress(self, sid, conn): msg = ('NOOP: 1\r\n' 'SID: %s\r\n' - 'SKB: %d\r\n') % (sid, (conn.all_out + conn.wrote_bytes)/1024) - throttle = throttle and ('SPD: %d\r\n' % conn.write_speed) or '' - return self.SendChunked('%s%s\r\n!' % (msg, throttle), - compress=False, just_buffer=True) + 'SKB: %d\r\n\r\n') % (sid, (conn.all_out + conn.wrote_bytes)/1024) + return self.SendChunked(msg, compress=False, just_buffer=True) def ProcessCorruptChunk(self, data): self.ResetRemoteZChunks() @@ -836,17 +836,12 @@ def Probe(self, host): return False return True - def AutoThrottle(self, max_speed=None, remote=False, delay=0.2): - # Never throttle tunnels. - return True - def ProgressTo(self, parse): try: sid = int(parse.Header('SID')[0]) - bps = int((parse.Header('SPD') or [-1])[0]) skb = int((parse.Header('SKB') or [-1])[0]) if sid in self.users: - self.users[sid].RecordProgress(skb, bps) + self.users[sid].RecordProgress(skb) except: logging.LogError(('Tunnel::ProgressTo: That made no sense! %s' ) % format_exc()) @@ -1089,9 +1084,6 @@ def ProcessChunk(self, data): # select/epoll loop catch and handle it. pass - if len(conn.write_blocked) > 0 and conn.created < time.time()-3: - return self.SendProgress(sid, conn, throttle=True) - else: # No connection? Close this stream. self.CloseStream(sid) @@ -1517,12 +1509,6 @@ def ProcessData(self, data): self.LogDebug('Send to tunnel failed') return False - # Back off if tunnel is stuffed. - if self.tunnel and len(self.tunnel.write_blocked) > 1024000: - # FIXME: think about this... - self.Throttle(delay=(len(self.tunnel.write_blocked)-204800)/max(50000, - self.tunnel.write_speed)) - if self.read_eof: return self.ProcessEofRead() return True diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 0e5947b1..fb856fa9 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -116,9 +116,6 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=None, self.write_eof = False self.write_retry = None - # Flow control v1 - self.throttle_until = (time.time() - 1) - self.max_read_speed = 96*1024 # Flow control v2 self.acked_kb_delta = 0 @@ -280,15 +277,14 @@ def LogTraffic(self, final=False): common.gYamon.vadd("bytes_all", self.wrote_bytes + self.read_bytes, wrap=1000000000) - if final: - self.Log([('wrote', '%d' % self.wrote_bytes), - ('wbps', '%d' % self.write_speed), - ('read', '%d' % self.read_bytes), - ('eof', '1')]) - else: - self.Log([('wrote', '%d' % self.wrote_bytes), + log_info = [('wrote', '%d' % self.wrote_bytes), ('wbps', '%d' % self.write_speed), - ('read', '%d' % self.read_bytes)]) + ('read', '%d' % self.read_bytes)] + if self.acked_kb_delta: + log_info.append(('delta', '%d' % self.acked_kb_delta)) + if final: + log_info.append(('eof', '1')) + self.Log(log_info) self.bytes_logged = now self.wrote_bytes = self.read_bytes = 0 @@ -347,19 +343,6 @@ def ReadData(self, maxread=None): now = time.time() maxread = maxread or self.maxread - flooded = self.Flooded(now) - if flooded > self.max_read_speed and not self.acked_kb_delta: - # FIXME: This is v1 flow control, kill it when 0.4.7 is "everywhere" - last = self.throttle_until - # Disable local throttling for really slow connections; remote - # throttles (trigged by blocked sockets) still work. - if self.max_read_speed > 1024: - self.AutoThrottle() - maxread = 1024 - if now > last and self.all_in > 2*self.max_read_speed: - self.max_read_speed *= 1.25 - self.max_read_speed += maxread - try: if self.peeking: data = self.fd.recv(maxread, socket.MSG_PEEK) @@ -369,8 +352,7 @@ def ReadData(self, maxread=None): else: data = self.fd.recv(maxread) if logging.DEBUG_IO: - print ('<== IN =[%s @ %dbps]==(\n%s)==' - ) % (self, self.max_read_speed, data[:160]) + print('<== IN =[%s]==(\n%s)==' % (self, data[:160])) except (SSL.WantReadError, SSL.WantWriteError), err: return True except IOError, err: @@ -406,46 +388,10 @@ def ReadData(self, maxread=None): if self.read_bytes > logging.LOG_THRESHOLD: self.LogTraffic() return self.ProcessData(data) - def Flooded(self, now=None): - delta = ((now or time.time()) - self.created) - if delta >= 1: - flooded = self.read_bytes + self.all_in - flooded -= self.max_read_speed * 0.95 * delta - return flooded - else: - return 0 - - def RecordProgress(self, skb, bps): + def RecordProgress(self, skb): if skb >= 0: all_read = (self.all_in + self.read_bytes) / 1024 - if self.acked_kb_delta: - self.acked_kb_delta = max(1, all_read - skb) - self.LogDebug('Delta is: %d' % self.acked_kb_delta) - elif bps >= 0: - self.Throttle(max_speed=bps, remote=True) - - def Throttle(self, max_speed=None, remote=False, delay=0.2): - if max_speed: - self.max_read_speed = max_speed - - flooded = max(-1, self.Flooded()) - if self.max_read_speed: - delay = min(10, max(0.1, flooded/self.max_read_speed)) - if flooded < 0: delay = 0 - - if delay: - ot = self.throttle_until - self.throttle_until = time.time() + delay - if ((self.throttle_until - ot) > 30 or - (int(ot) != int(self.throttle_until) and delay > 8)): - self.LogInfo('Throttled %.1fs until %x (flood=%d, bps=%s, %s)' % ( - delay, self.throttle_until, flooded, - self.max_read_speed, remote and 'remote' or 'local')) - - return True - - def AutoThrottle(self, max_speed=None, remote=False, delay=0.2): - return self.Throttle(max_speed, remote, delay) + self.acked_kb_delta = max(1, all_read - skb) def Send(self, data, try_flush=False, activity=False, just_buffer=False, allow_blocking=False): @@ -581,8 +527,7 @@ def Flush(self, loops=50, wait=False, allow_blocking=False): def IsReadable(s, now): return (s.fd and (not s.read_eof) - and (s.acked_kb_delta < 64) # FIXME - and (s.throttle_until <= now)) + and (s.acked_kb_delta < (3 * s.maxread/1024))) def IsBlocked(s): return (s.fd and (len(s.write_blocked) > 0)) From 60f2f54cb7c518ae4bb32c530f4bcf069d150fe3 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 10 Apr 2020 19:08:02 +0000 Subject: [PATCH 21/36] Avoid undefined variables in UnknownConn --- pagekite/proto/conns.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index 13af7e42..dacc96e1 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -1520,6 +1520,12 @@ class UnknownConn(MagicProtocolParser): def __init__(self, fd, address, on_port, conns): MagicProtocolParser.__init__(self, fd, address, on_port, ui=conns.config.ui) self.peeking = True + self.sid = -1 + self.host = None + self.proto = None + self.said_hello = False + self.bad_loops = 0 + self.error_details = {} # Set up our parser chain. self.parsers = [HttpLineParser] @@ -1533,13 +1539,6 @@ def __init__(self, fd, address, on_port, conns): self.conns.Add(self) self.conns.SetIdle(self, 10) - self.sid = -1 - self.host = None - self.proto = None - self.said_hello = False - self.bad_loops = 0 - self.error_details = {} - def Cleanup(self, close=True): MagicProtocolParser.Cleanup(self, close=close) self.conns = self.parser = None From 90abc5d58311f4f753321c1d0822c153dc0fc4f9 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 11 Apr 2020 11:06:57 +0000 Subject: [PATCH 22/36] Narrow permissions changes in deb postinst --- deb/postinst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/deb/postinst b/deb/postinst index f7b2ef5d..98db2deb 100644 --- a/deb/postinst +++ b/deb/postinst @@ -35,8 +35,8 @@ case "$1" in [ -e /etc/pagekite/pagekite.rc.dpkg-bak ] \ && mv /etc/pagekite/pagekite.rc.dpkg-bak /etc/pagekite.d/89_old_pagekite.rc - chmod 644 /etc/pagekite.d/* || true - chmod 600 /etc/pagekite.d/[019]* || true + chmod 644 /etc/pagekite.d/*.rc* || true + chmod 600 /etc/pagekite.d/[019]*rc* || true [ -d /etc/pagekite ] && rmdir /etc/pagekite || true ;; From cf1a7e5ce781a1adcde8a091ee754d7f2bb98695 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 11 Apr 2020 18:46:35 +0000 Subject: [PATCH 23/36] Modernize locking code --- pagekite/httpd.py | 17 +-- pagekite/pk.py | 254 +++++++++++++++++----------------- pagekite/proto/conns.py | 15 +- pagekite/proto/selectables.py | 12 +- pagekite/ui/remote.py | 23 +-- pagekite/yamond.py | 60 +++----- 6 files changed, 160 insertions(+), 221 deletions(-) diff --git a/pagekite/httpd.py b/pagekite/httpd.py index 91348b5c..81aec3fc 100755 --- a/pagekite/httpd.py +++ b/pagekite/httpd.py @@ -417,10 +417,8 @@ def do_POST(self, command='POST'): "string (%s bytes).") % clength) posted = cgi.parse_qs(self.rfile.read(clength), 1) elif self.host_config.get('xmlrpc', False): - # We wrap the XMLRPC request handler in _BEGIN/_END in order to - # expose the request environment to the RPC functions. - RCI = self.server.RCI - return RCI._END(SimpleXMLRPCRequestHandler.do_POST(RCI._BEGIN(self))) + with self.server.RCI.lock: + return SimpleXMLRPCRequestHandler.do_POST(self) self.post_data.seek(0) except socket.error: @@ -972,17 +970,6 @@ def __init__(self, httpd, pkite, conns): 'tokens': self.auth_tokens, 'data': logging.LOG}} - def _BEGIN(self, request_object): - self.lock.acquire() - self.request = request_object - return request_object - - def _END(self, rv=None): - if self.request: - self.request = None - self.lock.release() - return rv - def connections(self, auth_token): if (not self.request.host_config.get('console', False) or self.ACL_READ not in self.auth_tokens.get(auth_token, self.ACL_OPEN)): diff --git a/pagekite/pk.py b/pagekite/pk.py index 6f0465a3..aff32cdf 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -139,13 +139,10 @@ def __init__(self, app_path): def _q(self, args): if self.server is not None: - try: - self.lock.acquire() + with self.lock: self.server.stdin.write(' '.join(args) + '\n') self.server.stdin.flush() return self.server.stdout.readline().strip() - finally: - self.lock.release() else: return subprocess.check_output([self.app_path] + args).strip() @@ -175,16 +172,14 @@ def __init__(self, conns): self.qtime = 0.250 # A decent initial estimate def check(self, requests, conn, callback): - self.qc.acquire() - self.jobs.append((requests, conn, callback)) - self.qc.notify() - self.qc.release() + with self.qc: + self.jobs.append((requests, conn, callback)) + self.qc.notify() def quit(self): - self.qc.acquire() - self.keep_running = False - self.qc.notify() - self.qc.release() + with self.qc: + self.keep_running = False + self.qc.notify() try: self.join() except RuntimeError: @@ -201,128 +196,127 @@ def run(self): logging.LogDebug('AuthThread: done') def _run(self): - self.qc.acquire() - while self.keep_running: - now = int(time.time()) - if not self.jobs: - (requests, conn, callback) = None, None, None - self.qc.wait() - else: - (requests, conn, callback) = self.jobs.pop(0) - if logging.DEBUG_IO: print '=== AUTH REQUESTS\n%s\n===' % requests - self.qc.release() - - quotas = [] - q_conns = [] - q_days = [] - ip_limits = [] - results = [] - log_info = [] - session = '%x:%s:' % (now, globalSecret()) - for request in requests: - try: - proto, domain, srand, token, sign, prefix = request - except: - logging.LogError('Invalid request: %s' % (request, )) - continue - - what = '%s:%s:%s' % (proto, domain, srand) - session += what - if not token or not sign: - # Send a challenge. Our challenges are time-stamped, so we can - # put stict bounds on possible replay attacks (20 minutes atm). - results.append(('%s-SignThis' % prefix, - '%s:%s' % (what, signToken(payload=what, - timestamp=now)))) - else: - # Note: These 15 seconds are a magic number which should be well - # below the timeout in proto.conns.Tunnel._Connect(). - if ((not self.conns.config.authfail_closed) - and len(self.jobs) >= (15 / self.qtime)): - logging.LogError('Quota lookup skipped, over 15s worth of jobs queued') - (quota, days, conns, ipc, ips, reason) = ( - -2, None, None, None, None, None) - else: - # This is a bit lame, but we only check the token if the quota - # for this connection has never been verified. - t0 = time.time() - (quota, days, conns, ipc, ips, reason) = ( - self.conns.config.GetDomainQuota( - proto, domain, srand, token, sign, - check_token=(conn.quota is None))) - elapsed = (time.time() - t0) - self.qtime = max(0.2, (0.9 * self.qtime) + (0.1 * elapsed)) - - duplicates = self.conns.Tunnel(proto, domain) - if not quota: - if not reason: reason = 'quota' - results.append(('%s-Invalid' % prefix, what)) - results.append(('%s-Invalid-Why' % prefix, - '%s;%s' % (what, reason))) - log_info.extend([('rejected', domain), - ('quota', quota), - ('reason', reason)]) - elif duplicates: - # Duplicates... is the old one dead? Trigger a ping. - for conn in duplicates: - conn.TriggerPing() - results.append(('%s-Duplicate' % prefix, what)) - log_info.extend([('rejected', domain), - ('duplicate', 'yes')]) - else: - results.append(('%s-OK' % prefix, what)) - quotas.append((quota, request)) - if conns: q_conns.append(conns) - if days: q_days.append(days) - if not ipc: - try: - ipc, ips = self.conns.config.GetDefaultIPsPerSecond(domain) - except ValueError: - pass - if ipc and ips: - ip_limits.append((float(ipc)/ips, ipc, ips)) - if (proto.startswith('http') and - self.conns.config.GetTlsEndpointCtx(domain)): - results.append(('%s-SSL-OK' % prefix, what)) - - results.append(('%s-SessionID' % prefix, - '%x:%s' % (now, sha1hex(session)))) - results.append(('%s-Misc' % prefix, urllib.urlencode({ - 'motd': (self.conns.config.motd_message or ''), - }))) - for upgrade in self.conns.config.upgrade_info: - results.append(('%s-Upgrade' % prefix, ';'.join(upgrade))) - - if quotas: - min_qconns = min(q_conns or [0]) - if q_conns and min_qconns: - results.append(('%s-QConns' % prefix, min_qconns)) - - min_qdays = min(q_days or [0]) - if q_days and min_qdays: - results.append(('%s-QDays' % prefix, min_qdays)) - - min_ip_limits = min(ip_limits or [(0, None, None)])[1:] - if ip_limits and min_ip_limits[0]: - results.append(('%s-IPsPerSec' % prefix, '%s/%s' % min_ip_limits)) - - nz_quotas = [qp for qp in quotas if qp[0] and qp[0] > 0] - if nz_quotas: - quota = min(nz_quotas)[0] - conn.quota = [quota, [qp[1] for qp in nz_quotas], time.time()] - results.append(('%s-Quota' % prefix, quota)) - elif requests: - if not conn.quota: - conn.quota = [None, requests, time.time()] + with self.qc: + while self.keep_running: + now = int(time.time()) + if not self.jobs: + (requests, conn, callback) = None, None, None + self.qc.wait() + else: + (requests, conn, callback) = self.jobs.pop(0) + if logging.DEBUG_IO: print '=== AUTH REQUESTS\n%s\n===' % requests + self.qc.release() + + quotas = [] + q_conns = [] + q_days = [] + ip_limits = [] + results = [] + log_info = [] + session = '%x:%s:' % (now, globalSecret()) + for request in requests: + try: + proto, domain, srand, token, sign, prefix = request + except: + logging.LogError('Invalid request: %s' % (request, )) + continue + + what = '%s:%s:%s' % (proto, domain, srand) + session += what + if not token or not sign: + # Send a challenge. Our challenges are time-stamped, so we can + # put stict bounds on possible replay attacks (20 minutes atm). + results.append(('%s-SignThis' % prefix, + '%s:%s' % (what, signToken(payload=what, + timestamp=now)))) else: - conn.quota[2] = time.time() + # Note: These 15 seconds are a magic number which should be well + # below the timeout in proto.conns.Tunnel._Connect(). + if ((not self.conns.config.authfail_closed) + and len(self.jobs) >= (15 / self.qtime)): + logging.LogError('Quota lookup skipped, over 15s worth of jobs queued') + (quota, days, conns, ipc, ips, reason) = ( + -2, None, None, None, None, None) + else: + # This is a bit lame, but we only check the token if the quota + # for this connection has never been verified. + t0 = time.time() + (quota, days, conns, ipc, ips, reason) = ( + self.conns.config.GetDomainQuota( + proto, domain, srand, token, sign, + check_token=(conn.quota is None))) + elapsed = (time.time() - t0) + self.qtime = max(0.2, (0.9 * self.qtime) + (0.1 * elapsed)) + + duplicates = self.conns.Tunnel(proto, domain) + if not quota: + if not reason: reason = 'quota' + results.append(('%s-Invalid' % prefix, what)) + results.append(('%s-Invalid-Why' % prefix, + '%s;%s' % (what, reason))) + log_info.extend([('rejected', domain), + ('quota', quota), + ('reason', reason)]) + elif duplicates: + # Duplicates... is the old one dead? Trigger a ping. + for conn in duplicates: + conn.TriggerPing() + results.append(('%s-Duplicate' % prefix, what)) + log_info.extend([('rejected', domain), + ('duplicate', 'yes')]) + else: + results.append(('%s-OK' % prefix, what)) + quotas.append((quota, request)) + if conns: q_conns.append(conns) + if days: q_days.append(days) + if not ipc: + try: + ipc, ips = self.conns.config.GetDefaultIPsPerSecond(domain) + except ValueError: + pass + if ipc and ips: + ip_limits.append((float(ipc)/ips, ipc, ips)) + if (proto.startswith('http') and + self.conns.config.GetTlsEndpointCtx(domain)): + results.append(('%s-SSL-OK' % prefix, what)) + + results.append(('%s-SessionID' % prefix, + '%x:%s' % (now, sha1hex(session)))) + results.append(('%s-Misc' % prefix, urllib.urlencode({ + 'motd': (self.conns.config.motd_message or ''), + }))) + for upgrade in self.conns.config.upgrade_info: + results.append(('%s-Upgrade' % prefix, ';'.join(upgrade))) + + if quotas: + min_qconns = min(q_conns or [0]) + if q_conns and min_qconns: + results.append(('%s-QConns' % prefix, min_qconns)) + + min_qdays = min(q_days or [0]) + if q_days and min_qdays: + results.append(('%s-QDays' % prefix, min_qdays)) + + min_ip_limits = min(ip_limits or [(0, None, None)])[1:] + if ip_limits and min_ip_limits[0]: + results.append(('%s-IPsPerSec' % prefix, '%s/%s' % min_ip_limits)) + + nz_quotas = [qp for qp in quotas if qp[0] and qp[0] > 0] + if nz_quotas: + quota = min(nz_quotas)[0] + conn.quota = [quota, [qp[1] for qp in nz_quotas], time.time()] + results.append(('%s-Quota' % prefix, quota)) + elif requests: + if not conn.quota: + conn.quota = [None, requests, time.time()] + else: + conn.quota[2] = time.time() - if logging.DEBUG_IO: print '=== AUTH RESULTS\n%s\n===' % results - callback(results, log_info) - self.qc.acquire() + if logging.DEBUG_IO: print '=== AUTH RESULTS\n%s\n===' % results + callback(results, log_info) + self.qc.acquire() self.buffering = 0 - self.qc.release() ##[ Selectables ]############################################################## diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index dacc96e1..dbdde94c 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -1854,10 +1854,10 @@ def __init__(self, fd, address, on_port, conns): self.Send('PageKite? %s\r\n' % self.challenge) def readline(self): - self.qc.acquire() - while not self.lines: self.qc.wait() - line = self.lines.pop(0) - self.qc.release() + with self.qc: + while not self.lines: + self.qc.wait() + line = self.lines.pop(0) return line def write(self, data): @@ -1877,10 +1877,9 @@ def Disconnect(self): def ProcessLine(self, line, lines): if self.state == self.STATE_LIVE: - self.qc.acquire() - self.lines.append(line) - self.qc.notify() - self.qc.release() + with self.qc: + self.lines.append(line) + self.qc.notify() return True elif self.state == self.STATE_PASSWORD: if line.strip() == self.expect: diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index fb856fa9..87c64847 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -45,8 +45,7 @@ def obfuIp(ip): SELECTABLES = {} def getSelectableId(what): global SELECTABLES, SELECTABLE_ID, SELECTABLE_LOCK - try: - SELECTABLE_LOCK.acquire() + with SELECTABLE_LOCK: count = 0 while SELECTABLE_ID in SELECTABLES: SELECTABLE_ID += 1 @@ -58,8 +57,6 @@ def getSelectableId(what): raise ValueError('Too many conns!') SELECTABLES[SELECTABLE_ID] = what return SELECTABLE_ID - finally: - SELECTABLE_LOCK.release() class Selectable(object): @@ -484,10 +481,8 @@ def SendChunked(self, data, compress=True, zhistory=None, just_buffer=False): # Stop compressing streams that just get bigger. if zhistory and (zhistory[0] < zhistory[1]): compress = False - try: + with self.lock: try: - if self.lock: - self.lock.acquire() sdata = ''.join(data) if self.zw and compress and len(sdata) > 64: try: @@ -508,9 +503,6 @@ def SendChunked(self, data, compress=True, zhistory=None, just_buffer=False): except UnicodeDecodeError: logging.LogError('UnicodeDecodeError in SendChunked, wtf?') return False - finally: - if self.lock: - self.lock.release() def Flush(self, loops=50, wait=False, allow_blocking=False): while (loops != 0 and diff --git a/pagekite/ui/remote.py b/pagekite/ui/remote.py index 99bb9e20..5ce01706 100755 --- a/pagekite/ui/remote.py +++ b/pagekite/ui/remote.py @@ -270,9 +270,9 @@ def reset(self): # These routines are used by the PageKite UI, to communicate with us... def readline(self): - try: - self.pk_readlock.acquire() - while (not self.pk_incoming) and (not self.pk_eof): self.pk_readlock.wait() + with self.pk_readlock: + while (not self.pk_incoming) and (not self.pk_eof): + self.pk_readlock.wait() if self.pk_incoming: line = self.pk_incoming.pop(0) else: @@ -280,48 +280,37 @@ def readline(self): if self.debug: print '>>PK>> %s' % line.strip() return line - finally: - self.pk_readlock.release() def write(self, data): if self.debug: print '>>GUI>> %s' % data.strip() - try: - self.gui_readlock.acquire() + with self.gui_readlock: if data: self.gui_incoming += data else: self.gui_eof = True self.gui_readlock.notify() - finally: - self.gui_readlock.release() # And these are used by the GUI, to communicate with PageKite. def recv(self, bytecount): - try: - self.gui_readlock.acquire() + with self.gui_readlock: while (len(self.gui_incoming) < bytecount) and (not self.gui_eof): self.gui_readlock.wait() data = self.gui_incoming[0:bytecount] self.gui_incoming = self.gui_incoming[bytecount:] return data - finally: - self.gui_readlock.release() def send(self, data): if not data.endswith('\n') and data != '': raise ValueError('Please always send whole lines') if self.debug: print '< self.values[var]: self.values[var] = value - finally: - self.lock.release() def vscale(self, var, ratio, add=0): - try: - self.lock.acquire() + with self.lock: if var not in self.values: self.values[var] = 0 self.values[var] *= ratio self.values[var] += add - finally: - self.lock.release() def vset(self, var, value): - try: - self.lock.acquire() + with self.lock: self.values[var] = value - finally: - self.lock.release() def vadd(self, var, value, wrap=None): - try: - self.lock.acquire() + with self.lock: if var not in self.values: self.values[var] = 0 self.values[var] += value if wrap is not None and self.values[var] >= wrap: self.values[var] -= wrap - finally: - self.lock.release() def vmin(self, var, value): - try: - self.lock.acquire() + with self.lock: if value < self.values[var]: self.values[var] = value - finally: - self.lock.release() def vdel(self, var): - try: - self.lock.acquire() + with self.lock: if var in self.values: del self.values[var] - finally: - self.lock.release() def lcreate(self, listn, elems): - try: - self.lock.acquire() + with self.lock: self.lists[listn] = [elems, 0, ['' for x in xrange(0, elems)]] - finally: - self.lock.release() def ladd(self, listn, value): - try: - self.lock.acquire() + with self.lock: lst = self.lists[listn] lst[2][lst[1]] = value lst[1] += 1 lst[1] %= lst[0] - finally: - self.lock.release() def render_vars_text(self, view=None): if view: @@ -190,15 +166,16 @@ def render_vars_text(self, view=None): else: values, lists = self.values, self.lists - data = [] - for var in values: - data.append('%s: %s\n' % (var, values[var])) + with self.lock: + data = [] + for var in values: + data.append('%s: %s\n' % (var, values[var])) - for lname in lists: - (elems, offset, lst) = lists[lname] - l = lst[offset:] - l.extend(lst[:offset]) - data.append('%s: %s\n' % (lname, ' '.join(['%s' % (x, ) for x in l]))) + for lname in lists: + (elems, offset, lst) = lists[lname] + l = lst[offset:] + l.extend(lst[:offset]) + data.append('%s: %s\n' % (lname, ' '.join(['%s' % (x, ) for x in l]))) data.sort() return ''.join(data) @@ -213,7 +190,8 @@ def run(self): self.httpd = self.server(self, self.handler) self.sspec = self.httpd.server_address self.running = True - while self.running: self.httpd.handle_request() + while self.running: + self.httpd.handle_request() if __name__ == '__main__': From d34b8a3f4cc9f5662b1b4d0b51d1037412bb64c0 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sat, 11 Apr 2020 21:53:18 +0000 Subject: [PATCH 24/36] More locks! Switch to re-entrant locks (RLock), avoid pypy deadlocks. --- pagekite/httpd.py | 7 ++++--- pagekite/pk.py | 5 +++-- pagekite/proto/conns.py | 1 - pagekite/proto/selectables.py | 26 ++++++++++++++++---------- pagekite/yamond.py | 21 ++++++++++----------- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/pagekite/httpd.py b/pagekite/httpd.py index 81aec3fc..1e6c1967 100755 --- a/pagekite/httpd.py +++ b/pagekite/httpd.py @@ -857,8 +857,9 @@ def handleHttpRequest(self, scheme, netloc, path, params, query, frag, if path == self.host_config.get('yamon', False): if common.gYamon: - self.server.pkite.Overloaded(yamon=common.gYamon) - data['body'] = common.gYamon.render_vars_text(qs.get('view', [None])[0]) + with selectables.SELECTABLE_LOCK: + self.server.pkite.Overloaded(yamon=common.gYamon) + data['body'] = common.gYamon.render_vars_text(qs.get('view', [None])[0]) else: data['body'] = '' @@ -958,7 +959,7 @@ def __init__(self, httpd, pkite, conns): self.conns = conns self.modified = False - self.lock = threading.Lock() + self.lock = threading.RLock() self.request = None # For now, nobody gets ACL_WRITE diff --git a/pagekite/pk.py b/pagekite/pk.py index aff32cdf..4d272838 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -327,7 +327,7 @@ class Connections(object): def __init__(self, config): self.config = config self.ip_tracker = {} - self.lock = threading.Lock() + self.lock = threading.RLock() self.idle = [] self.conns = [] self.conns_by_id = {} @@ -3883,7 +3883,8 @@ def Loop(self): if epoll: epoll.close() epoll, mypoll = self.CreatePollObject() - logging.LogDebug('Loop #%d, selectable map: %s' % (loop_count, SELECTABLES)) + with SELECTABLE_LOCK: + logging.LogDebug('Loop #%d, selectable map: %s' % (loop_count, SELECTABLES)) if 0 == (loop_count % (5 if logging.DEBUG_IO else 250)): logging.LogDebug('Loop #%d (i=%d, o=%d, e=%d, s=%.3fs) v%s' % (loop_count, len(iready), len(oready), len(eready), snooze, APPVER)) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index dbdde94c..b21e56a1 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -1101,7 +1101,6 @@ def __init__(self, conns, which, backends): if self.fd: self.fd = None self.weighted_rtt = -1000 - self.lock = None self.backends = backends self.require_all = True self.server_info[self.S_NAME] = LOOPBACK[which] diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 87c64847..695a1a5a 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -40,7 +40,7 @@ def obfuIp(ip): return '~%s' % '.'.join([q for q in quads[-2:]]) -SELECTABLE_LOCK = threading.Lock() +SELECTABLE_LOCK = threading.RLock() # threading.Lock() will deadlock on pypy! SELECTABLE_ID = 0 SELECTABLES = {} def getSelectableId(what): @@ -117,7 +117,7 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=None, self.acked_kb_delta = 0 # Compression stuff - self.lock = threading.Lock() + self.lock = threading.RLock() self.zw = None self.zlevel = 1 self.zreset = False @@ -142,8 +142,9 @@ def CountAs(self, what): common.gYamon.vadd(self.countas, -1) common.gYamon.vadd(what, 1) self.countas = what - global SELECTABLES - SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) + global SELECTABLES, SELECTABLE_LOCK + with SELECTABLE_LOCK: + SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) def Cleanup(self, close=True): self.peeked = self.zw = '' @@ -161,6 +162,8 @@ def Cleanup(self, close=True): self.LogTraffic(final=True) def __del__(self): + # Important: This can run at random times, especially under pypy, so all + # locks must be re-entrant (RLock), otherwise we deadlock. try: if common.gYamon: common.gYamon.vadd(self.countas, -1) @@ -168,8 +171,9 @@ def __del__(self): except AttributeError: pass try: - global SELECTABLES - del SELECTABLES[self.gsid] + global SELECTABLES, SELECTABLE_LOCK + with SELECTABLE_LOCK: + del SELECTABLES[self.gsid] except (KeyError, TypeError): pass @@ -288,8 +292,9 @@ def LogTraffic(self, final=False): elif final: self.Log([('eof', '1')]) - global SELECTABLES - SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) + global SELECTABLES, SELECTABLE_LOCK + with SELECTABLE_LOCK: + SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) def SayHello(self): pass @@ -299,8 +304,9 @@ def ProcessData(self, data): return False def ProcessEof(self): - global SELECTABLES - SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) + global SELECTABLES, SELECTABLE_LOCK + with SELECTABLE_LOCK: + SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) if self.read_eof and self.write_eof and not self.write_blocked: self.Cleanup() return False diff --git a/pagekite/yamond.py b/pagekite/yamond.py index 54e1fa9c..7693ac40 100755 --- a/pagekite/yamond.py +++ b/pagekite/yamond.py @@ -101,7 +101,7 @@ def __init__(self, sspec, server=YamonHttpServer, handler=YamonRequestHandler): threading.Thread.__init__(self) - self.lock = threading.Lock() + self.lock = threading.RLock() # threading.Lock() will deadlock pypy self.server = server self.handler = handler self.sspec = sspec @@ -166,16 +166,15 @@ def render_vars_text(self, view=None): else: values, lists = self.values, self.lists - with self.lock: - data = [] - for var in values: - data.append('%s: %s\n' % (var, values[var])) - - for lname in lists: - (elems, offset, lst) = lists[lname] - l = lst[offset:] - l.extend(lst[:offset]) - data.append('%s: %s\n' % (lname, ' '.join(['%s' % (x, ) for x in l]))) + data = [] + for var in values: + data.append('%s: %s\n' % (var, values[var])) + + for lname in lists: + (elems, offset, lst) = lists[lname] + l = lst[offset:] + l.extend(lst[:offset]) + data.append('%s: %s\n' % (lname, ' '.join(['%s' % (x, ) for x in l]))) data.sort() return ''.join(data) From 6be8a5cc10ebd05bbbf08508f9a8ea6c68ff6e6e Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Tue, 14 Apr 2020 11:27:45 +0000 Subject: [PATCH 25/36] Only use SELECTABLES for making SIDs unique, nothing else --- pagekite/httpd.py | 9 ++------- pagekite/pk.py | 9 +-------- pagekite/proto/selectables.py | 24 ++++++------------------ 3 files changed, 9 insertions(+), 33 deletions(-) diff --git a/pagekite/httpd.py b/pagekite/httpd.py index 1e6c1967..eee1d065 100755 --- a/pagekite/httpd.py +++ b/pagekite/httpd.py @@ -857,9 +857,8 @@ def handleHttpRequest(self, scheme, netloc, path, params, query, frag, if path == self.host_config.get('yamon', False): if common.gYamon: - with selectables.SELECTABLE_LOCK: - self.server.pkite.Overloaded(yamon=common.gYamon) - data['body'] = common.gYamon.render_vars_text(qs.get('view', [None])[0]) + self.server.pkite.Overloaded(yamon=common.gYamon) + data['body'] = common.gYamon.render_vars_text(qs.get('view', [None])[0]) else: data['body'] = '' @@ -1125,10 +1124,6 @@ def __init__(self, sspec, pkite, conns, gYamon.lcreate("tunnel_rtt", 100) gYamon.lcreate("tunnel_wrtt", 100) gYamon.lists['buffered_bytes'] = [1, 0, common.buffered_bytes] - gYamon.views['selectables'] = (selectables.SELECTABLES, { - 'idle': [0, 0, self.conns.idle], - 'conns': [0, 0, self.conns.conns] - }) except: pass diff --git a/pagekite/pk.py b/pagekite/pk.py index 4d272838..46c14006 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -3833,7 +3833,7 @@ def Loop(self): if self.ui_comm: self.ui_comm.start() epoll, mypoll = self.CreatePollObject() - self.last_barf = self.last_loop = time.time() + self.last_loop = time.time() logging.LogDebug('Entering main %s loop' % (epoll and 'epoll' or 'select')) loop_count = 0 @@ -3878,13 +3878,6 @@ def Loop(self): snooze /= 2 time.sleep(snooze) - if now - self.last_barf > (logging.DEBUG_IO and 15 or 600): - self.last_barf = now - if epoll: - epoll.close() - epoll, mypoll = self.CreatePollObject() - with SELECTABLE_LOCK: - logging.LogDebug('Loop #%d, selectable map: %s' % (loop_count, SELECTABLES)) if 0 == (loop_count % (5 if logging.DEBUG_IO else 250)): logging.LogDebug('Loop #%d (i=%d, o=%d, e=%d, s=%.3fs) v%s' % (loop_count, len(iready), len(oready), len(eready), snooze, APPVER)) diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 695a1a5a..a3f08323 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -42,20 +42,18 @@ def obfuIp(ip): SELECTABLE_LOCK = threading.RLock() # threading.Lock() will deadlock on pypy! SELECTABLE_ID = 0 -SELECTABLES = {} +SELECTABLES = set([]) def getSelectableId(what): global SELECTABLES, SELECTABLE_ID, SELECTABLE_LOCK with SELECTABLE_LOCK: count = 0 while SELECTABLE_ID in SELECTABLES: SELECTABLE_ID += 1 - SELECTABLE_ID %= 0x10000 - if (SELECTABLE_ID % 0x00800) == 0: - logging.LogDebug('Selectable map: %s' % (SELECTABLES, )) + SELECTABLE_ID %= 0x20000 count += 1 - if count > 0x10001: + if count > 0x20000: raise ValueError('Too many conns!') - SELECTABLES[SELECTABLE_ID] = what + SELECTABLES.add(SELECTABLE_ID) return SELECTABLE_ID @@ -142,9 +140,6 @@ def CountAs(self, what): common.gYamon.vadd(self.countas, -1) common.gYamon.vadd(what, 1) self.countas = what - global SELECTABLES, SELECTABLE_LOCK - with SELECTABLE_LOCK: - SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) def Cleanup(self, close=True): self.peeked = self.zw = '' @@ -173,8 +168,8 @@ def __del__(self): try: global SELECTABLES, SELECTABLE_LOCK with SELECTABLE_LOCK: - del SELECTABLES[self.gsid] - except (KeyError, TypeError): + SELECTABLES.remove(self.gsid) + except KeyError: pass def __str__(self): @@ -292,10 +287,6 @@ def LogTraffic(self, final=False): elif final: self.Log([('eof', '1')]) - global SELECTABLES, SELECTABLE_LOCK - with SELECTABLE_LOCK: - SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) - def SayHello(self): pass @@ -304,9 +295,6 @@ def ProcessData(self, data): return False def ProcessEof(self): - global SELECTABLES, SELECTABLE_LOCK - with SELECTABLE_LOCK: - SELECTABLES[self.gsid] = '%s %s' % (self.countas, self) if self.read_eof and self.write_eof and not self.write_blocked: self.Cleanup() return False From c015cd48715b8e42f057d87d8d5baa4865f64a59 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Tue, 14 Apr 2020 11:28:49 +0000 Subject: [PATCH 26/36] Reestimate overload sooner --- pagekite/pk.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index 46c14006..1fd5c518 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -1877,7 +1877,7 @@ def CalculateOverload(self, cload=None): try: # Check internal load, abort if load is low anyway. cload = cload or self._get_overload_factor() - if ((cload <= (self.overload // 2)) and + if ((cload <= (self.overload // 3)) and (self.overload == self.overload_current)): return From e12a3d7aeb61f701b9b1ad964aae386da5fbe30a Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Tue, 14 Apr 2020 11:28:19 +0000 Subject: [PATCH 27/36] Better document accept.acl --- etc/pagekite.d/accept.acl.sample | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/etc/pagekite.d/accept.acl.sample b/etc/pagekite.d/accept.acl.sample index 8f80b0ae..e00fda1b 100644 --- a/etc/pagekite.d/accept.acl.sample +++ b/etc/pagekite.d/accept.acl.sample @@ -2,7 +2,10 @@ # # This is a file for use on frontend relays to restrict access. Note # that this effects both tunnels and client connections and is really -# only intended for blacklisting abusive clients on a temporary basis. +# only intended for blocking abusive clients on a temporary basis. +# +# WARNING: This is inefficient and slow. Every line added to this file +# has a cost. # # To enable these rules, rename the file and add the following to one # of the `/etc/pagekite.d/*.rc` files: From 2fcff968b0556efbfcd0aaa8c392c5cdab438ea2 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 15 Apr 2020 08:21:13 +0000 Subject: [PATCH 28/36] Create --watchdog= --- doc/HISTORY.txt | 6 +++-- pagekite/pk.py | 72 ++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 75 insertions(+), 3 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 2cecc1c2..137407ee 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -5,8 +5,10 @@ Version history - highlights v1.0.0.200406 ------------- - Create ping.pagekite fast-path in dedicated thread - - Make select loop timing and read sizes configurable - - Remove 0.4.x flow-control, fix bugs in current flow control code + - Make select loop timing and read sizes configurable, tweak defaults + - Remove 0.4.x flow-control, fix major bugs in current flow control code + - Fix deadlocks under pypy + - Added --watchdog=N, to self-reap locked up processes v1.0.0.200327 diff --git a/pagekite/pk.py b/pagekite/pk.py index 1fd5c518..c8064358 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -86,7 +86,7 @@ 'frontend=', 'nofrontend=', 'frontends=', 'keepalive=', 'torify=', 'socksify=', 'proxy=', 'noproxy', 'new', 'all', 'noall', 'dyndns=', 'nozchunks', 'sslzlib', - 'buffers=', 'noprobes', 'debugio', 'watch=', + 'buffers=', 'noprobes', 'debugio', 'watch=', 'watchdog=', 'overload=', 'overload_cpu=', 'overload_mem=', 'overload_file=', # DEPRECATED: 'reloadfile=', 'autosave', 'noautosave', 'webroot=', @@ -160,6 +160,52 @@ def supports_auth(self): return ('AUTH' in self.capabilities) +class WatchdogThread(threading.Thread): + """Kill the app if it locks up.""" + daemon = True + + def __init__(self, timeout): + threading.Thread.__init__(self) + self.pid = os.getpid() + self.conns = [] + self.timeout = timeout + self.updated = time.time() + self.locks = {} + + def patpatpat(self): + self.updated = time.time() + + def run(self): + import signal + last_update = 0 + if common.gYamon and self.timeout: + common.gYamon.vset('watchdog', self.timeout) + + while self.timeout and (self.updated != last_update): + last_update = self.updated + logging.LogDebug('Watchdog is happy, snoozing %ds' % self.timeout) + time.sleep(self.timeout) + + if self.timeout: + try: + for lock_name, lock in self.locks.iteritems(): + logging.LogDebug('Lock %s %s' % ( + lock_name, + lock.acquire(blocking=False) and 'is free' or 'is LOCKED')) + for conn in copy.copy(self.conns): + try: + logging.LogError('Watchdog is sad: closing %s' % conn) + conn.fd.close() + except: + pass + finally: + logging.LogError('Watchdog is sad: kill -INT %s' % self.pid) + os.kill(self.pid, signal.SIGINT) + time.sleep(2) + logging.LogError('Watchdog is sad: kill -9 %s' % self.pid) + os.kill(self.pid, 9) + + class AuthThread(threading.Thread): """Handle authentication work in a separate thread.""" daemon = True @@ -1037,6 +1083,8 @@ def ResetConfiguration(self): self.main_loop = True self.watch_level = [None] + self.watchdog = None + self.overload = None self.overload_cpu = 0.75 self.overload_mem = 0.85 @@ -2425,6 +2473,8 @@ def Configure(self, argv): elif opt == '--sslzlib': self.enable_sslzlib = True elif opt == '--watch': self.watch_level[0] = int(arg) + elif opt == '--watchdog': + self.watchdog = WatchdogThread(int(arg)) elif opt == '--overload': self.overload_current = self.overload = int(arg) elif opt == '--overload_file': @@ -3832,6 +3882,23 @@ def Loop(self): if self.tunnel_manager: self.tunnel_manager.start() if self.ui_comm: self.ui_comm.start() + if self.watchdog: + self.watchdog.conns = self.conns.conns + try: + self.watchdog.locks['httpd.RCI.lock'] = self.ui_httpd.httpd.RCI.lock + except AttributeError: + pass + if common.gYamon: + self.watchdog.locks['YamonD.lock'] = common.gYamon.lock + # FIXME: Add the AuthApp locks? + for i in range(0, len(self.conns.auth_pool)): + lock_name = 'conns.auth_pool[%d].qc' % i + self.watchdog.locks[lock_name] = self.conns.auth_pool[i].qc + self.watchdog.locks.update({ + 'Connections.lock': self.conns.lock, + 'SELECTABLE_LOCK': SELECTABLE_LOCK}) + self.watchdog.start() + epoll, mypoll = self.CreatePollObject() self.last_loop = time.time() @@ -3882,6 +3949,9 @@ def Loop(self): logging.LogDebug('Loop #%d (i=%d, o=%d, e=%d, s=%.3fs) v%s' % (loop_count, len(iready), len(oready), len(eready), snooze, APPVER)) + if self.watchdog: + self.watchdog.patpatpat() + if epoll: epoll.close() From 18518a4e2a33c2416046bc1dcd6b38bce44d7364 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 15 Apr 2020 16:09:17 +0000 Subject: [PATCH 29/36] Avoid holding conns.lock, Yamon locks for too long --- pagekite/pk.py | 70 ++++++++++++++++++----------------- pagekite/proto/selectables.py | 31 +++++++++------- pagekite/yamond.py | 52 +++++++++++++++----------- 3 files changed, 83 insertions(+), 70 deletions(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index c8064358..af757a0a 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -3805,43 +3805,45 @@ def Epoll(self, epoll, waittime): evs = [] broken = False try: - bbc = 0 with self.conns.lock: - for c in self.conns.conns: - fd, mask = c.fd, 0 - if not c.IsDead(): - if c.IsBlocked(): - bbc += len(c.write_blocked) - mask |= select.EPOLLOUT - if c.IsReadable(now): - mask |= select.EPOLLIN - - if mask: - try: - fdc[fd.fileno()] = fd - except socket.error: - # If this fails, then the socket has HUPed, however we need to - # bypass epoll to make sure that's reflected in iready below. - bid = 'dead-%d' % len(evs) - fdc[bid] = fd - evs.append((bid, select.EPOLLHUP)) - # Trigger removal of c.fd, if it was still in the epoll. - fd, mask = None, 0 - - if mask: - try: - epoll.modify(fd, mask) - except IOError: - try: - epoll.register(fd, mask) - except (IOError, TypeError): - evs.append((fd, select.EPOLLHUP)) # Error == HUP - else: + clist = copy.copy(self.conns.conns) + + bbc = 0 + for c in clist: + fd, mask = c.fd, 0 + if not c.IsDead(): + if c.IsBlocked(): + bbc += len(c.write_blocked) + mask |= select.EPOLLOUT + if c.IsReadable(now): + mask |= select.EPOLLIN + + if mask: + try: + fdc[fd.fileno()] = fd + except socket.error: + # If this fails, then the socket has HUPed, however we need to + # bypass epoll to make sure that's reflected in iready below. + bid = 'dead-%d' % len(evs) + fdc[bid] = fd + evs.append((bid, select.EPOLLHUP)) + # Trigger removal of c.fd, if it was still in the epoll. + fd, mask = None, 0 + + if mask: + try: + epoll.modify(fd, mask) + except IOError: try: - epoll.unregister(c.fd) # Important: Use c.fd, not fd! + epoll.register(fd, mask) except (IOError, TypeError): - # Failing to unregister is OK, ignore - pass + evs.append((fd, select.EPOLLHUP)) # Error == HUP + else: + try: + epoll.unregister(c.fd) # Important: Use c.fd, not fd! + except (IOError, TypeError): + # Failing to unregister is OK, ignore + pass common.buffered_bytes[0] = bbc evs.extend(epoll.poll(waittime)) diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index a3f08323..e248da20 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -88,6 +88,7 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=None, self.address = address self.on_port = on_port self.created = self.bytes_logged = time.time() + self.lock = threading.RLock() self.last_activity = 0 self.dead = False self.ui = ui @@ -115,7 +116,6 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=None, self.acked_kb_delta = 0 # Compression stuff - self.lock = threading.RLock() self.zw = None self.zlevel = 1 self.zreset = False @@ -136,10 +136,11 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=None, common.gYamon.vadd('selectables', 1) def CountAs(self, what): - if common.gYamon: - common.gYamon.vadd(self.countas, -1) - common.gYamon.vadd(what, 1) - self.countas = what + with self.lock: + if common.gYamon: + common.gYamon.vadd(self.countas, -1) + common.gYamon.vadd(what, 1) + self.countas = what def Cleanup(self, close=True): self.peeked = self.zw = '' @@ -155,22 +156,24 @@ def Cleanup(self, close=True): self.CountAs('selectables_dead') if close: self.LogTraffic(final=True) + try: + global SELECTABLES, SELECTABLE_LOCK + with SELECTABLE_LOCK: + SELECTABLES.remove(self.gsid) + except KeyError: + pass def __del__(self): # Important: This can run at random times, especially under pypy, so all # locks must be re-entrant (RLock), otherwise we deadlock. try: - if common.gYamon: - common.gYamon.vadd(self.countas, -1) - common.gYamon.vadd('selectables', -1) + with self.lock: + if common.gYamon and self.countas: + common.gYamon.vadd(self.countas, -1) + common.gYamon.vadd('selectables', -1) + self.countas = None except AttributeError: pass - try: - global SELECTABLES, SELECTABLE_LOCK - with SELECTABLE_LOCK: - SELECTABLES.remove(self.gsid) - except KeyError: - pass def __str__(self): return '%s: %s<%s%s%s>' % (self.log_id, self.__class__, diff --git a/pagekite/yamond.py b/pagekite/yamond.py index 7693ac40..c9db2075 100755 --- a/pagekite/yamond.py +++ b/pagekite/yamond.py @@ -101,7 +101,6 @@ def __init__(self, sspec, server=YamonHttpServer, handler=YamonRequestHandler): threading.Thread.__init__(self) - self.lock = threading.RLock() # threading.Lock() will deadlock pypy self.server = server self.handler = handler self.sspec = sspec @@ -111,38 +110,47 @@ def __init__(self, sspec, self.lists = {} self.views = {} + # Important: threading.Lock() will deadlock pypy and generally we want + # to avoid locking. The methods below only hold this lock + # if they are adding/removing elements from our dicts and + # lists. For mutating existing values we either just accept + # things getting overwritten or rely on the GIL. + self.lock = threading.RLock() + def vmax(self, var, value): - with self.lock: - if value > self.values[var]: - self.values[var] = value + # Unlocked, since we don't change the size of self.values + if value > self.values[var]: + self.values[var] = value + + def vmin(self, var, value): + # Unlocked, since we don't change the size of self.values + if value < self.values[var]: + self.values[var] = value def vscale(self, var, ratio, add=0): - with self.lock: - if var not in self.values: - self.values[var] = 0 - self.values[var] *= ratio - self.values[var] += add + if var not in self.values: + with self.lock: + self.values[var] = self.values.get(var, 0) + # Unlocked, since we don't change the size of self.values + self.values[var] *= ratio + self.values[var] += add def vset(self, var, value): with self.lock: self.values[var] = value def vadd(self, var, value, wrap=None): - with self.lock: - if var not in self.values: - self.values[var] = 0 - self.values[var] += value - if wrap is not None and self.values[var] >= wrap: - self.values[var] -= wrap - - def vmin(self, var, value): - with self.lock: - if value < self.values[var]: - self.values[var] = value + if var not in self.values: + with self.lock: + self.values[var] = self.values.get(var, 0) + # We assume the GIL will guarantee these do sane things + self.values[var] += value + if wrap: + self.values[var] %= wrap def vdel(self, var): - with self.lock: - if var in self.values: + if var in self.values: + with self.lock: del self.values[var] def lcreate(self, listn, elems): From 7e62ce6ec4ae171930579109c99dce0b4fdded19 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 15 Apr 2020 23:06:23 +0000 Subject: [PATCH 30/36] Improve overload calculations, skip snooze at backend, add yamon vars --- pagekite/httpd.py | 1 + pagekite/pk.py | 40 +++++++++++++++++++++++----------------- pagekite/yamond.py | 11 +++++++++++ 3 files changed, 35 insertions(+), 17 deletions(-) diff --git a/pagekite/httpd.py b/pagekite/httpd.py index eee1d065..a6dcdf90 100755 --- a/pagekite/httpd.py +++ b/pagekite/httpd.py @@ -1119,6 +1119,7 @@ def __init__(self, sspec, pkite, conns, gYamon = common.gYamon = yamond.YamonD(sspec) gYamon.vset('started', int(time.time())) gYamon.vset('version', APPVER) + gYamon.vset('version_python', sys.version.replace('\n', ' ')) gYamon.vset('httpd_ssl_enabled', self.enable_ssl) gYamon.vset('errors', 0) gYamon.lcreate("tunnel_rtt", 100) diff --git a/pagekite/pk.py b/pagekite/pk.py index af757a0a..bab4a693 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -811,7 +811,7 @@ def DoFrontendWork(self, loop_count): # Update our idea of what it means to be overloaded. if self.pkite.overload and (1 == loop_count % 20): - self.pkite.CalculateOverload() + self.pkite.CalculateOverload(yamon=common.gYamon) # FIXME: Front-ends should close dead back-end tunnels. for tid in self.conns.tunnels: @@ -1903,10 +1903,10 @@ def _get_overload_factor(self): if common.gYamon is not None: return ( common.gYamon.values.get('backends_live', 0) + - common.gYamon.values.get('selectables_live', 1)) + common.gYamon.values.get('selectables_live', 1)) or 1 return (len(self.conns.tunnels) or 1) - def CalculateOverload(self, cload=None): + def CalculateOverload(self, cload=None, yamon=None): # Check overload file first, it overrides everything if self.overload_file: try: @@ -1923,12 +1923,6 @@ def CalculateOverload(self, cload=None): # FIXME: This is almost certainly linux specific. # FIXME: There are too many magic numbers in here. try: - # Check internal load, abort if load is low anyway. - cload = cload or self._get_overload_factor() - if ((cload <= (self.overload // 3)) and - (self.overload == self.overload_current)): - return - # If both are disabled, just bail out. if not (self.overload_cpu or self.overload_mem): return @@ -1952,14 +1946,19 @@ def CalculateOverload(self, cload=None): if not self.overload_membase: self.overload_membase = float(meminfo['memtotal']) - memfree # Sanity checks... are these really necessary? - self.overload_membase = max(75000, self.overload_membase) + self.overload_membase = max(50000, self.overload_membase) self.overload_membase = min(self.overload_membase, 0.9 * meminfo['memtotal']) + # Check internal load, abort if load is low anyway. + cload = cload or self._get_overload_factor() + if cload < 50: + return + # Calculate the implied unit cost of every live connection memtotal = float(meminfo['memtotal'] - self.overload_membase) - munit = max(75, float(memtotal - memfree) / cload) # 75KB/conn=optimism! - lunit = loadavg / cload + munit = max(32, float(memtotal - memfree) / cload) # 32KB/conn=optimism! + lunit = max(0.10, loadavg) / cload # Calculate overload factors based on the unit costs moverload = int(self.overload_mem * float(memtotal) / munit) @@ -1985,6 +1984,9 @@ def CalculateOverload(self, cload=None): self.overload, moverload, loverload, cload, munit, lunit, memfree, memtotal, loadavg)) + if yamon is not None: + yamon.vset('overload_unit_mem', munit) + yamon.vset('overload_unit_cpu', lunit) except (IOError, OSError, ValueError, KeyError, TypeError): pass @@ -3941,11 +3943,14 @@ def Loop(self): # Con: # - Adds latency # - snooze = max(0, (now + common.SELECT_LOOP_MIN_MS/1000.0) - time.time()) - if snooze: - if oready: - snooze /= 2 - time.sleep(snooze) + if self.isfrontend: + snooze = max(0, (now + common.SELECT_LOOP_MIN_MS/1000.0) - time.time()) + if snooze: + if oready: + snooze /= 2 + time.sleep(snooze) + else: + snooze = 0 if 0 == (loop_count % (5 if logging.DEBUG_IO else 250)): logging.LogDebug('Loop #%d (i=%d, o=%d, e=%d, s=%.3fs) v%s' @@ -3974,6 +3979,7 @@ def Start(self, howtoquit='CTRL+C = Stop'): alignright='[%s]' % howtoquit) config_report = [('started', self.pyfile), ('version', APPVER), ('platform', sys.platform), + ('python', sys.version.replace('\n', ' ')), ('argv', ' '.join(sys.argv[1:])), ('ca_certs', self.ca_certs)] for optf in self.rcfiles_loaded: diff --git a/pagekite/yamond.py b/pagekite/yamond.py index c9db2075..5519dbe5 100755 --- a/pagekite/yamond.py +++ b/pagekite/yamond.py @@ -177,12 +177,23 @@ def render_vars_text(self, view=None): data = [] for var in values: data.append('%s: %s\n' % (var, values[var])) + if var == 'started': + data.append( + 'started_days_ago: %.3f\n' % ((time.time() - values[var]) / 86400)) for lname in lists: (elems, offset, lst) = lists[lname] l = lst[offset:] l.extend(lst[:offset]) data.append('%s: %s\n' % (lname, ' '.join(['%s' % (x, ) for x in l]))) + try: + slist = sorted([float(i) for i in l if i]) + if len(slist) >= 10: + data.append('%s_m50: %.2f\n' % (lname, slist[int(len(slist) * 0.5)])) + data.append('%s_m90: %.2f\n' % (lname, slist[int(len(slist) * 0.9)])) + data.append('%s_avg: %.2f\n' % (lname, sum(slist) / len(slist))) + except (ValueError, TypeError, IndexError, ZeroDivisionError): + pass data.sort() return ''.join(data) From d54ffffd47473f26cc11e3d0ebdb8a3eb84dceb7 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Thu, 16 Apr 2020 01:06:27 +0000 Subject: [PATCH 31/36] Fix file open() calls for better pypy compatibility --- pagekite/pk.py | 36 +++++++++++++++++++----------------- pagekite/proto/conns.py | 35 ++++++++++++++++++----------------- pagekite/proto/proto.py | 7 ++++--- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/pagekite/pk.py b/pagekite/pk.py index bab4a693..105a59e4 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -1910,7 +1910,8 @@ def CalculateOverload(self, cload=None, yamon=None): # Check overload file first, it overrides everything if self.overload_file: try: - new_overload = int(open(self.overload_file, 'r').read().strip()) + with open(self.overload_file, 'r') as fd: + new_overload = int(fd.read().strip()) if new_overload != self.overload_current: self.overload_current = new_overload logging.LogInfo( @@ -1928,14 +1929,16 @@ def CalculateOverload(self, cload=None, yamon=None): return # Check system load. - loadavg = float(open('/proc/loadavg', 'r').read().strip().split()[1]) + with open('/proc/loadavg', 'r') as fd: + loadavg = float(fd.read().strip().split()[1]) meminfo = {} - for line in open('/proc/meminfo', 'r'): - try: - key, val = line.lower().split(':') - meminfo[key] = int(val.strip().split()[0]) - except ValueError: - pass + with open('/proc/meminfo', 'r') as fd: + for line in fd: + try: + key, val = line.lower().split(':') + meminfo[key] = int(val.strip().split()[0]) + except ValueError: + pass # Figure out how much RAM is available memfree = meminfo.get('memavailable') @@ -2158,9 +2161,8 @@ def BindUiSspec(self, force=False): def LoadMOTD(self): if self.motd: try: - f = open(self.motd, 'r') - self.motd_message = ''.join(f.readlines()).strip()[:8192] - f.close() + with open(self.motd, 'r') as f: + self.motd_message = ''.join(f.readlines()).strip()[:8192] except (OSError, IOError): pass @@ -3252,9 +3254,10 @@ def GetHostIpAddrs(self, host): rv = [] if host[:1] == '@': try: - for line in (l.strip() for l in open(host[1:], 'r')): - if line and line[:1] not in ('#', ';'): - rv.append(line) + with open(host[1:], 'r') as fd: + for line in (l.strip() for l in fd): + if line and line[:1] not in ('#', ';'): + rv.append(line) logging.LogDebug('Loaded %d IPs from %s' % (len(rv), host[1:])) except: logging.LogDebug('Failed to load IPs from %s' % host[1:]) @@ -4062,9 +4065,8 @@ def reopen(x,y): # Create PID file if self.pidfile: - pf = open(self.pidfile, 'w') - pf.write('%s\n' % os.getpid()) - pf.close() + with open(self.pidfile, 'w') as pf: + pf.write('%s\n' % os.getpid()) # Do this after creating the PID and log-files. if self.daemonize: diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index b21e56a1..808858cf 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -2041,23 +2041,24 @@ def check_acl(self, ipaddr, default=True): try: ipaddr = '%s' % ipaddr lc = 0 - for line in open(self.acl, 'r'): - line = line.lower().strip() - lc += 1 - if line.startswith('#') or not line: - continue - try: - words = line.split() - pattern, rule = words[:2] - reason = ' '.join(words[2:]) - if ipaddr == pattern: - self.acl_match = (lc, pattern, rule, reason) - return bool('allow' in rule) - elif re.compile(pattern).match(ipaddr): - self.acl_match = (lc, pattern, rule, reason) - return bool('allow' in rule) - except IndexError: - self.LogDebug('Invalid line %d in ACL %s' % (lc, self.acl)) + with open(self.acl, 'r') as fd: + for line in fd: + line = line.lower().strip() + lc += 1 + if line.startswith('#') or not line: + continue + try: + words = line.split() + pattern, rule = words[:2] + reason = ' '.join(words[2:]) + if ipaddr == pattern: + self.acl_match = (lc, pattern, rule, reason) + return bool('allow' in rule) + elif re.compile(pattern).match(ipaddr): + self.acl_match = (lc, pattern, rule, reason) + return bool('allow' in rule) + except IndexError: + self.LogDebug('Invalid line %d in ACL %s' % (lc, self.acl)) except: self.LogDebug( 'Failed to read/parse %s: %s' % (self.acl, format_exc())) diff --git a/pagekite/proto/proto.py b/pagekite/proto/proto.py index f74e5601..da29b6f7 100755 --- a/pagekite/proto/proto.py +++ b/pagekite/proto/proto.py @@ -42,9 +42,10 @@ def globalSecret(): # Next, see if we can augment that with some real randomness. try: - newSecret = sha1hex(open('/dev/urandom').read(64) + gSecret) - gSecret = newSecret - logging.LogDebug('Seeded signatures using /dev/urandom, hooray!') + with open('/dev/urandom') as fd: + newSecret = sha1hex(fd.read(64) + gSecret) + gSecret = newSecret + logging.LogDebug('Seeded signatures using /dev/urandom, hooray!') except: try: newSecret = sha1hex(os.urandom(64) + gSecret) From 5fa2b54153bf9b7dffefc756faf484437ae46017 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Sun, 19 Apr 2020 19:01:07 +0000 Subject: [PATCH 32/36] Disabled old ssl workarounds on modern versions of Python --- doc/HISTORY.txt | 1 + pagekite/compat.py | 5 +++-- pagekite/pk.py | 4 +++- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 137407ee..19bba74f 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -9,6 +9,7 @@ v1.0.0.200406 - Remove 0.4.x flow-control, fix major bugs in current flow control code - Fix deadlocks under pypy - Added --watchdog=N, to self-reap locked up processes + - Disabled old ssl workarounds on modern versions of Python v1.0.0.200327 diff --git a/pagekite/compat.py b/pagekite/compat.py index 1aed90c9..239354e9 100755 --- a/pagekite/compat.py +++ b/pagekite/compat.py @@ -20,6 +20,7 @@ along with this program. If not, see: """ ############################################################################## +import sys import common from common import * @@ -121,7 +122,7 @@ def rsplit(ch, data): # import sockschain socks = sockschain -if socks.HAVE_PYOPENSSL: +if socks.HAVE_PYOPENSSL or tuple(sys.version_info) > (2, 7, 10): SSL = socks.SSL SEND_ALWAYS_BUFFERS = False SEND_MAX_BYTES = 16 * 1024 @@ -131,7 +132,7 @@ def rsplit(ch, data): SSL = socks.SSL SEND_ALWAYS_BUFFERS = True SEND_MAX_BYTES = 4 * 1024 - TUNNEL_SOCKET_BLOCKS = True # Workaround for http://bugs.python.org/issue8240 + TUNNEL_SOCKET_BLOCKS = True # Workaround for http://bugs.python.org/issue8240 else: SEND_ALWAYS_BUFFERS = False diff --git a/pagekite/pk.py b/pagekite/pk.py index 105a59e4..a7744c3a 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -3984,7 +3984,9 @@ def Start(self, howtoquit='CTRL+C = Stop'): ('platform', sys.platform), ('python', sys.version.replace('\n', ' ')), ('argv', ' '.join(sys.argv[1:])), - ('ca_certs', self.ca_certs)] + ('ca_certs', self.ca_certs), + ('send_always_buffers', SEND_ALWAYS_BUFFERS), + ('tunnel_socket_blocks', TUNNEL_SOCKET_BLOCKS)] for optf in self.rcfiles_loaded: config_report.append(('optfile_%s' % optf, 'ok')) logging.Log(config_report) From f9c941c46bb3542ebb52f1f6e196af8c05e142f9 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 22 Apr 2020 11:51:54 +0000 Subject: [PATCH 33/36] Better report the state of a selectable in the __str__ method --- pagekite/proto/conns.py | 6 +++ pagekite/proto/selectables.py | 71 +++++++++++++++++++++++------------ 2 files changed, 54 insertions(+), 23 deletions(-) diff --git a/pagekite/proto/conns.py b/pagekite/proto/conns.py index 808858cf..93e9edc8 100755 --- a/pagekite/proto/conns.py +++ b/pagekite/proto/conns.py @@ -2081,6 +2081,7 @@ def HandleClient(self, client, address): def ReadData(self, maxread=None): try: + self.sstate = 'accept' self.last_activity = time.time() client, address = self.fd.accept() if self.port not in SMTP_PORTS: @@ -2091,15 +2092,20 @@ def ReadData(self, maxread=None): except IOError: client = None elif client: + self.sstate = 'client' self.HandleClient(client, address) + self.sstate = (self.dead and 'dead' or 'idle') return True except IOError, err: + self.sstate += '/ioerr=%s' % (err.errno,) self.LogDebug('Listener::ReadData: error: %s (%s)' % (err, err.errno)) except socket.error, (errno, msg): + self.sstate += '/sockerr=%s' % (errno,) self.LogInfo('Listener::ReadData: error: %s (errno=%s)' % (msg, errno)) except Exception, e: + self.sstate += '/exc' self.LogDebug('Listener::ReadData: %s' % e) return True diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index e248da20..965e9362 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -121,6 +121,7 @@ def __init__(self, fd=None, address=None, on_port=None, maxread=None, self.zreset = False # Logging + self.sstate = 'new' self.alt_id = None self.countas = 'selectables_live' self.sid = self.gsid = getSelectableId(self.countas) @@ -153,6 +154,7 @@ def Cleanup(self, close=True): self.fd = None if not self.dead: self.dead = True + self.sstate = 'dead' self.CountAs('selectables_dead') if close: self.LogTraffic(final=True) @@ -176,10 +178,10 @@ def __del__(self): pass def __str__(self): - return '%s: %s<%s%s%s>' % (self.log_id, self.__class__, - self.read_eof and '-' or 'r', - self.write_eof and '-' or 'w', - len(self.write_blocked)) + return '%s: %s<%s|%s%s%s>' % (self.log_id, self.__class__, self.sstate, + self.read_eof and '-' or 'r', + self.write_eof and '-' or 'w', + len(self.write_blocked)) def __html__(self): try: @@ -207,7 +209,7 @@ def __html__(self): self.all_out + self.wrote_bytes, time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(self.created)), - self.dead and 'dead' or 'alive') + self.sstate) def ResetZChunks(self): if self.zw: @@ -242,22 +244,22 @@ def SetConn(self, conn): self.wrote_bytes = conn.wrote_bytes def Log(self, values): - if self.log_id: values.append(('id', self.log_id)) + if self.log_id: values.extend([('id', self.log_id)]) logging.Log(values) def LogError(self, error, params=None): values = params or [] - if self.log_id: values.append(('id', self.log_id)) + if self.log_id: values.extend([('id', self.log_id), ('s', self.sstate)]) logging.LogError(error, values) def LogDebug(self, message, params=None): values = params or [] - if self.log_id: values.append(('id', self.log_id)) + if self.log_id: values.extend([('id', self.log_id), ('s', self.sstate)]) logging.LogDebug(message, values) def LogInfo(self, message, params=None): values = params or [] - if self.log_id: values.append(('id', self.log_id)) + if self.log_id: values.extend([('id', self.log_id), ('s', self.sstate)]) logging.LogInfo(message, values) def LogTrafficStatus(self, final=False): @@ -319,7 +321,9 @@ def EatPeeked(self, eat_bytes=None, keep_peeking=False): discard = '' while len(discard) < eat_bytes: try: - discard += self.fd.recv(eat_bytes - len(discard)) + bytecount = eat_bytes - len(discard) + self.sstate = 'eat(%d)' % bytecount + discard += self.fd.recv(bytecount) except socket.error, (errno, msg): self.LogInfo('Error reading (%d/%d) socket: %s (errno=%s)' % ( eat_bytes, self.peeked, msg, errno)) @@ -327,6 +331,7 @@ def EatPeeked(self, eat_bytes=None, keep_peeking=False): if logging.DEBUG_IO: print '===[ ATE %d PEEKED BYTES ]===\n' % eat_bytes + self.sstate = 'ate(%d)' % eat_bytes self.peeked -= eat_bytes self.peeking = keep_peeking return @@ -339,17 +344,22 @@ def ReadData(self, maxread=None): maxread = maxread or self.maxread try: if self.peeking: + self.sstate = 'peek(%d)' % maxread data = self.fd.recv(maxread, socket.MSG_PEEK) self.peeked = len(data) if logging.DEBUG_IO: print '<== PEEK =[%s]==(\n%s)==' % (self, data[:160]) else: + self.sstate = 'read(%d)' % maxread data = self.fd.recv(maxread) if logging.DEBUG_IO: print('<== IN =[%s]==(\n%s)==' % (self, data[:160])) + self.sstate = 'data(%d)' % len(data) except (SSL.WantReadError, SSL.WantWriteError), err: + self.sstate += '/SSL.WRE' return True except IOError, err: + self.sstate += '/ioerr=%s' % (err.errno,) if err.errno not in self.HARMLESS_ERRNOS: self.LogDebug('Error reading socket: %s (%s)' % (err, err.errno)) common.DISCONNECT_COUNT += 1 @@ -357,10 +367,12 @@ def ReadData(self, maxread=None): else: return True except (SSL.Error, SSL.ZeroReturnError, SSL.SysCallError), err: + self.sstate += '/SSL.Error' self.LogDebug('Error reading socket (SSL): %s' % err) common.DISCONNECT_COUNT += 1 return False except socket.error, (errno, msg): + self.sstate += '/sockerr=%s' % (err.errno,) if errno in self.HARMLESS_ERRNOS: return True else: @@ -368,19 +380,23 @@ def ReadData(self, maxread=None): common.DISCONNECT_COUNT += 1 return False - self.last_activity = now - if data is None or data == '': - self.read_eof = True - if logging.DEBUG_IO: - print '<== IN =[%s]==(EOF)==' % self - return self.ProcessData('') - else: - if not self.peeking: - self.read_bytes += len(data) - if self.acked_kb_delta: - self.acked_kb_delta += (len(data)/1024) - if self.read_bytes > logging.LOG_THRESHOLD: self.LogTraffic() - return self.ProcessData(data) + try: + self.last_activity = now + if data is None or data == '': + self.sstate += '/EOF' + self.read_eof = True + if logging.DEBUG_IO: + print '<== IN =[%s]==(EOF)==' % self + return self.ProcessData('') + else: + if not self.peeking: + self.read_bytes += len(data) + if self.acked_kb_delta: + self.acked_kb_delta += (len(data)/1024) + if self.read_bytes > logging.LOG_THRESHOLD: self.LogTraffic() + return self.ProcessData(data) + finally: + self.sstate = (self.dead and 'dead' or 'idle') def RecordProgress(self, skb): if skb >= 0: @@ -406,6 +422,7 @@ def Send(self, data, try_flush=False, activity=False, try: want_send = self.write_retry or min(len(sending), SEND_MAX_BYTES) sent_bytes = None + self.sstate = 'send(%d)' % (want_send) # Try to write for up to 5 seconds before giving up for try_wait in (0, 0, 0.1, 0.2, 0.2, 0.2, 0.3, 0.5, 0.5, 1, 1, 1, 0): try: @@ -420,13 +437,16 @@ def Send(self, data, try_flush=False, activity=False, if logging.DEBUG_IO: print '=== WRITE SSL RETRY: =[%s: %s bytes]==' % (self, want_send) if try_wait: + self.sstate = 'send/SSL.WRE(%d,%.1f)' % (want_send, try_wait) time.sleep(try_wait) if sent_bytes is None: + self.sstate += '/retries' self.LogInfo('Error sending: Too many SSL write retries') self.ProcessEofWrite() common.DISCONNECT_COUNT += 1 return False except IOError, err: + self.sstate += '/ioerr=%s' % (err.errno,) if err.errno not in self.HARMLESS_ERRNOS: self.LogInfo('Error sending: %s' % err) self.ProcessEofWrite() @@ -437,6 +457,7 @@ def Send(self, data, try_flush=False, activity=False, print '=== WRITE HICCUP: =[%s: %s bytes]==' % (self, want_send) self.write_retry = want_send except socket.error, (errno, msg): + self.sstate += '/sockerr=%s' % (errno,) if errno not in self.HARMLESS_ERRNOS: self.LogInfo('Error sending: %s (errno=%s)' % (msg, errno)) self.ProcessEofWrite() @@ -447,11 +468,13 @@ def Send(self, data, try_flush=False, activity=False, print '=== WRITE HICCUP: =[%s: %s bytes]==' % (self, want_send) self.write_retry = want_send except (SSL.Error, SSL.ZeroReturnError, SSL.SysCallError), err: + self.sstate += '/SSL.Error' self.LogInfo('Error sending (SSL): %s' % err) self.ProcessEofWrite() common.DISCONNECT_COUNT += 1 return False except AttributeError: + self.sstate += '/AttrError' # This has been seen in the wild, is most likely some sort of # race during shutdown. :-( self.LogInfo('AttributeError, self.fd=%s' % self.fd) @@ -468,6 +491,8 @@ def Send(self, data, try_flush=False, activity=False, if self.write_eof and not self.write_blocked: self.ProcessEofWrite() + + self.sstate = (self.dead and 'dead' or 'idle') return True def SendChunked(self, data, compress=True, zhistory=None, just_buffer=False): From a0545d079586604033b0e3c4872551ed8a4ea7a1 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 22 Apr 2020 16:34:24 +0000 Subject: [PATCH 34/36] Update watchdog to add a SIGUSR1 handler and a yamonish endpoint --- pagekite/httpd.py | 7 ++++- pagekite/pk.py | 71 +++++++++++++++++++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 15 deletions(-) diff --git a/pagekite/httpd.py b/pagekite/httpd.py index a6dcdf90..503d6f23 100755 --- a/pagekite/httpd.py +++ b/pagekite/httpd.py @@ -856,7 +856,12 @@ def handleHttpRequest(self, scheme, netloc, path, params, query, frag, photobackup = self.host_config.get('photobackup', False) if path == self.host_config.get('yamon', False): - if common.gYamon: + if qs.get('view', [None])[0] == 'conns': + from pagekite.pk import Watchdog + llines = [] + Watchdog.DumpConnState(self.server.pkite.conns, logfunc=llines.append) + data['body'] = '\n'.join(llines) + '\n' + elif common.gYamon: self.server.pkite.Overloaded(yamon=common.gYamon) data['body'] = common.gYamon.render_vars_text(qs.get('view', [None])[0]) else: diff --git a/pagekite/pk.py b/pagekite/pk.py index a7744c3a..47d2909b 100755 --- a/pagekite/pk.py +++ b/pagekite/pk.py @@ -160,7 +160,7 @@ def supports_auth(self): return ('AUTH' in self.capabilities) -class WatchdogThread(threading.Thread): +class Watchdog(threading.Thread): """Kill the app if it locks up.""" daemon = True @@ -172,19 +172,57 @@ def __init__(self, timeout): self.updated = time.time() self.locks = {} + @classmethod + def DumpConnState(cls, conns, close=False, logfunc=None): + for fpc in copy.copy(conns.ping_helper.clients): + try: + if close: + (logfunc or logging.LogError)('Closing FPC %s' % (fpc,)) + fpc[1].close() + else: + (logfunc or logging.LogInfo)('FastPing: %s' % (fpc,)) + except: + pass + for conn in copy.copy(conns.conns): + try: + if close: + (logfunc or logging.LogError)('Closing %s' % conn) + conn.fd.close() + else: + (logfunc or logging.LogInfo)('Conn %s' % conn) + except: + pass + def patpatpat(self): self.updated = time.time() def run(self): import signal - last_update = 0 + if self.timeout: + self.timeout = max(15, self.timeout) # Lower than this won't work! if common.gYamon and self.timeout: common.gYamon.vset('watchdog', self.timeout) - while self.timeout and (self.updated != last_update): - last_update = self.updated - logging.LogDebug('Watchdog is happy, snoozing %ds' % self.timeout) - time.sleep(self.timeout) + failed = 5 # Log happy message after first sleep + worries = 0 + last_update = self.updated + while self.timeout and (failed < 10) and (worries < self.timeout): + time.sleep(self.timeout / 10.0) + if self.updated == last_update: + failed += 1 + worries += 1 + logging.LogInfo('Watchdog is worried (timeout=%ds, failures=%d/10, worries=%.1f/%d)' + % (self.timeout, failed, worries, self.timeout)) + if common.gYamon: + common.gYamon.vadd('watchdog_worried', 1) + if failed in (1, 6): + os.kill(self.pid, signal.SIGUSR1) + else: + if failed: + logging.LogInfo('Watchdog is happy (timeout=%ds)' % self.timeout) + failed = 0 + worries *= 0.9 + last_update = self.updated if self.timeout: try: @@ -192,12 +230,7 @@ def run(self): logging.LogDebug('Lock %s %s' % ( lock_name, lock.acquire(blocking=False) and 'is free' or 'is LOCKED')) - for conn in copy.copy(self.conns): - try: - logging.LogError('Watchdog is sad: closing %s' % conn) - conn.fd.close() - except: - pass + self.DumpConnState(self.conns, close=True) finally: logging.LogError('Watchdog is sad: kill -INT %s' % self.pid) os.kill(self.pid, signal.SIGINT) @@ -2478,7 +2511,7 @@ def Configure(self, argv): elif opt == '--watch': self.watch_level[0] = int(arg) elif opt == '--watchdog': - self.watchdog = WatchdogThread(int(arg)) + self.watchdog = Watchdog(int(arg)) elif opt == '--overload': self.overload_current = self.overload = int(arg) elif opt == '--overload_file': @@ -4052,11 +4085,21 @@ def Start(self, howtoquit='CTRL+C = Stop'): def reopen(x,y): if self.logfile: self.LogTo(self.logfile, close_all=False) - logging.LogDebug('SIGHUP received, reopening: %s' % self.logfile) + logging.LogInfo('SIGHUP received, reopening: %s' % self.logfile) signal.signal(signal.SIGHUP, reopen) except Exception: logging.LogError('Warning: signal handler unavailable, logrotate will not work.') + # Set up SIGUSR1 handler. + try: + import signal + def dumpconns(x,y): + logging.LogInfo('SIGUSR1 received, dumping conn state') + Watchdog.DumpConnState(self.conns) + signal.signal(signal.SIGUSR1, dumpconns) + except Exception: + logging.LogError('Warning: signal handler unavailable, kill -USR1 will not work.') + # Disable compression in OpenSSL if socks.HAVE_SSL and not self.enable_sslzlib: socks.DisableSSLCompression() From 662c914b5e0edc46069c0ebec70af6f7b4e39dca Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Wed, 22 Apr 2020 16:35:41 +0000 Subject: [PATCH 35/36] Avoid premature reuse of selectable IDs --- pagekite/proto/selectables.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pagekite/proto/selectables.py b/pagekite/proto/selectables.py index 965e9362..5e8bcefb 100755 --- a/pagekite/proto/selectables.py +++ b/pagekite/proto/selectables.py @@ -47,6 +47,8 @@ def getSelectableId(what): global SELECTABLES, SELECTABLE_ID, SELECTABLE_LOCK with SELECTABLE_LOCK: count = 0 + SELECTABLE_ID += 1 + SELECTABLE_ID %= 0x20000 while SELECTABLE_ID in SELECTABLES: SELECTABLE_ID += 1 SELECTABLE_ID %= 0x20000 From 57ec526317aed1c67e684787c8812a5b14a7e810 Mon Sep 17 00:00:00 2001 From: "Bjarni R. Einarsson" Date: Fri, 24 Apr 2020 23:35:40 +0000 Subject: [PATCH 36/36] Update history --- doc/HISTORY.txt | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/HISTORY.txt b/doc/HISTORY.txt index 19bba74f..966ee3e0 100644 --- a/doc/HISTORY.txt +++ b/doc/HISTORY.txt @@ -2,14 +2,15 @@ Version history - highlights ============================ -v1.0.0.200406 +v1.0.1.200424 ------------- + - This release (v1.0.1) is all about performance and efficiency! - Create ping.pagekite fast-path in dedicated thread - Make select loop timing and read sizes configurable, tweak defaults - Remove 0.4.x flow-control, fix major bugs in current flow control code - - Fix deadlocks under pypy + - Fix locking-related deadlocks under PyPy - Added --watchdog=N, to self-reap locked up processes - - Disabled old ssl workarounds on modern versions of Python + - Disabled old ssl workarounds on modern versions of Python (broke PyPy) v1.0.0.200327