aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDaniel Thompson <daniel.thompson@linaro.org>2019-01-15 16:27:09 +0000
committerDaniel Thompson <daniel.thompson@linaro.org>2019-01-15 16:27:09 +0000
commit1eb902aba53ca316e4a617d38434b9be8ef91f53 (patch)
tree16ed2148074b4bae592acbb513b076b90f843881
parente809847ed32fe77828e5a4a561a6463b0248a7e3 (diff)
96btool: pull: Add a proper rate limiting framework
Rather than having "random" sleeps in the code lets introduce a proper rate limiting framework that can keep (just) within the limits imposed by the server.
-rwxr-xr-xbin/96btool48
1 files changed, 41 insertions, 7 deletions
diff --git a/bin/96btool b/bin/96btool
index cf78dd5..6f3cba3 100755
--- a/bin/96btool
+++ b/bin/96btool
@@ -35,6 +35,7 @@ import os
import textwrap
import time
import traceback
+import types
import toys.chart as chart
import toys.collect as collect
@@ -99,6 +100,33 @@ def load_json(obj):
return ujson.load(obj)
+def _get(self, path, **kwargs):
+ # Calculate how long to nap
+ zzz = self._sleep_until - time.perf_counter()
+
+ # Nap if required
+ if zzz > 0:
+ time.sleep(zzz)
+
+ # Calculate new sleep until. Normally we can use the last sleep
+ # point but discourse is pretty sensitive (there must be no
+ # window in the last minute where we exceed the rate limit) so
+ # we reset the rate limiter whenever have dropped below our
+ # maximum rate.
+ if zzz < self._intra_request_delay:
+ self._sleep_until = time.perf_counter() + self._intra_request_delay
+ else:
+ self._sleep_until += self._intra_request_delay
+
+ return self._wrapped_get(path, **kwargs)
+
+def set_rate_limit(client, requests_per_minute):
+ # Aim our request rate to be one below the true rate limit
+ client._intra_request_delay = 60 / (requests_per_minute - 1)
+ client._sleep_until = time.perf_counter() - client._intra_request_delay
+ client._wrapped_get = client._get
+ client._get = types.MethodType(_get, client)
+
class Post(dict):
post_db = {}
topic_db = {}
@@ -110,7 +138,6 @@ class Post(dict):
def get_user(self, client):
if self['username'] not in Post.user_db:
- time.sleep(0.1)
Post.user_db[self['username']] = client.user(self['username'])
self['user'] = Post.user_db[self['username']]
return self['user']
@@ -143,7 +170,6 @@ class Post(dict):
def fetch(client, from_id, to_id, verbose=False):
for post_id in range(from_id, to_id):
try:
- time.sleep(0.5)
post = client._get('/posts/{}.json'.format(post_id))
post = Post(post)
if post.lint():
@@ -161,7 +187,6 @@ class Post(dict):
result = '.'
if post['topic_id'] not in Post.topic_db:
- time.sleep(0.1)
try:
topic = client._get(
'/t/{}.json'.format(
@@ -604,10 +629,17 @@ def do_piechart(args):
def do_pull(args):
'''Update the local cache of the database'''
cfg = config.get_config()
- client = pydiscourse.DiscourseClient( cfg['96btool']['server'],
- api_username=cfg['96btool']['username'],
- api_key=config.get_password(cfg, '96btool'),
- timeout=5)
+ if args.login:
+ client = pydiscourse.DiscourseClient( cfg['96btool']['server'],
+ api_username=cfg['96btool']['username'],
+ api_key=config.get_password(cfg, '96btool'),
+ timeout=5)
+ set_rate_limit(client, 60)
+ else:
+ client = pydiscourse.DiscourseClient(
+ cfg['96btool']['server'],
+ api_username=None, api_key=None, timeout=5)
+ set_rate_limit(client, 200)
if args.verbose:
sys.stdout.write('Reading existing post cache .')
@@ -924,6 +956,8 @@ def main(argv):
help="Extra diagnostics")
s.add_argument('--db', default=defaultdb,
help="File to update")
+ s.add_argument('--login', action='store_true',
+ help="Do not fetch information anonymously")
s.add_argument('--pipe', action='store_true',
help="Duplicate output on stdout")
s.add_argument('--refresh', action='store_true',