96btool: pull: Add a proper rate limiting framework

Rather than having "random" sleeps in the code lets introduce a proper rate limiting framework that can keep (just) within the limits imposed by the server.
author: Daniel Thompson <daniel.thompson@linaro.org> 2019-01-15 16:27:09 +0000
committer: Daniel Thompson <daniel.thompson@linaro.org> 2019-01-15 16:27:09 +0000
commit: 1eb902aba53ca316e4a617d38434b9be8ef91f53 (patch)
tree: 16ed2148074b4bae592acbb513b076b90f843881
parent: e809847ed32fe77828e5a4a561a6463b0248a7e3 (diff)
1 files changed, 41 insertions, 7 deletions
diff --git a/bin/96btool b/bin/96btool
index cf78dd5..6f3cba3 100755
--- a/bin/96btool
+++ b/bin/96btool
@@ -35,6 +35,7 @@ import os
 import textwrap
 import time
 import traceback
+import types
 
 import toys.chart as chart
 import toys.collect as collect
@@ -99,6 +100,33 @@ def load_json(obj):
 
 	return ujson.load(obj)
 
+def _get(self, path, **kwargs):
+	# Calculate how long to nap
+	zzz = self._sleep_until - time.perf_counter()
+
+	# Nap if required
+	if zzz > 0:
+		time.sleep(zzz)
+
+	# Calculate new sleep until. Normally we can use the last sleep
+	# point but discourse is pretty sensitive (there must be no
+	# window in the last minute where we exceed the rate limit) so
+	# we reset the rate limiter whenever have dropped below our
+	# maximum rate.
+	if zzz < self._intra_request_delay:
+		self._sleep_until = time.perf_counter() + self._intra_request_delay
+	else:
+		self._sleep_until += self._intra_request_delay
+
+	return self._wrapped_get(path, **kwargs)
+
+def set_rate_limit(client, requests_per_minute):
+	# Aim our request rate to be one below the true rate limit
+	client._intra_request_delay = 60 / (requests_per_minute - 1)
+	client._sleep_until = time.perf_counter() - client._intra_request_delay
+	client._wrapped_get = client._get
+	client._get = types.MethodType(_get, client)
+
 class Post(dict):
 	post_db = {}
 	topic_db = {}
@@ -110,7 +138,6 @@ class Post(dict):
 
 	def get_user(self, client):
 		if self['username'] not in Post.user_db:
-			time.sleep(0.1)
 			Post.user_db[self['username']] = client.user(self['username'])
 		self['user'] = Post.user_db[self['username']]
 		return self['user']
@@ -143,7 +170,6 @@ class Post(dict):
 	def fetch(client, from_id, to_id, verbose=False):
 		for post_id in range(from_id, to_id):
 			try:
-				time.sleep(0.5)
 				post = client._get('/posts/{}.json'.format(post_id))
 				post = Post(post)
 				if post.lint():
@@ -161,7 +187,6 @@ class Post(dict):
 			result = '.'
 
 			if post['topic_id'] not in Post.topic_db:
-				time.sleep(0.1)
 				try:
 					topic = client._get(
 						'/t/{}.json'.format(
@@ -604,10 +629,17 @@ def do_piechart(args):
 def do_pull(args):
 	'''Update the local cache of the database'''
 	cfg = config.get_config()
-	client = pydiscourse.DiscourseClient( cfg['96btool']['server'],
-			api_username=cfg['96btool']['username'],
-			api_key=config.get_password(cfg, '96btool'),
-			timeout=5)
+	if args.login:
+		client = pydiscourse.DiscourseClient( cfg['96btool']['server'],
+				api_username=cfg['96btool']['username'],
+				api_key=config.get_password(cfg, '96btool'),
+				timeout=5)
+		set_rate_limit(client, 60)
+	else:
+		client = pydiscourse.DiscourseClient(
+				cfg['96btool']['server'],
+				api_username=None, api_key=None, timeout=5)
+		set_rate_limit(client, 200)
 
 	if args.verbose:
 		sys.stdout.write('Reading existing post cache .')
@@ -924,6 +956,8 @@ def main(argv):
 		       help="Extra diagnostics")
 	s.add_argument('--db', default=defaultdb,
 		       help="File to update")
+	s.add_argument('--login', action='store_true',
+		       help="Do not fetch information anonymously")
 	s.add_argument('--pipe', action='store_true',
 		       help="Duplicate output on stdout")
 	s.add_argument('--refresh', action='store_true',
author	Daniel Thompson <daniel.thompson@linaro.org>	2019-01-15 16:27:09 +0000
committer	Daniel Thompson <daniel.thompson@linaro.org>	2019-01-15 16:27:09 +0000
commit	1eb902aba53ca316e4a617d38434b9be8ef91f53 (patch)
tree	16ed2148074b4bae592acbb513b076b90f843881
parent	e809847ed32fe77828e5a4a561a6463b0248a7e3 (diff)