diff options
author | Daniel Thompson <daniel.thompson@linaro.org> | 2019-01-15 16:27:09 +0000 |
---|---|---|
committer | Daniel Thompson <daniel.thompson@linaro.org> | 2019-01-15 16:27:09 +0000 |
commit | 1eb902aba53ca316e4a617d38434b9be8ef91f53 (patch) | |
tree | 16ed2148074b4bae592acbb513b076b90f843881 | |
parent | e809847ed32fe77828e5a4a561a6463b0248a7e3 (diff) |
96btool: pull: Add a proper rate limiting framework
Rather than having "random" sleeps in the code lets introduce a proper
rate limiting framework that can keep (just) within the limits imposed
by the server.
-rwxr-xr-x | bin/96btool | 48 |
1 files changed, 41 insertions, 7 deletions
diff --git a/bin/96btool b/bin/96btool index cf78dd5..6f3cba3 100755 --- a/bin/96btool +++ b/bin/96btool @@ -35,6 +35,7 @@ import os import textwrap import time import traceback +import types import toys.chart as chart import toys.collect as collect @@ -99,6 +100,33 @@ def load_json(obj): return ujson.load(obj) +def _get(self, path, **kwargs): + # Calculate how long to nap + zzz = self._sleep_until - time.perf_counter() + + # Nap if required + if zzz > 0: + time.sleep(zzz) + + # Calculate new sleep until. Normally we can use the last sleep + # point but discourse is pretty sensitive (there must be no + # window in the last minute where we exceed the rate limit) so + # we reset the rate limiter whenever have dropped below our + # maximum rate. + if zzz < self._intra_request_delay: + self._sleep_until = time.perf_counter() + self._intra_request_delay + else: + self._sleep_until += self._intra_request_delay + + return self._wrapped_get(path, **kwargs) + +def set_rate_limit(client, requests_per_minute): + # Aim our request rate to be one below the true rate limit + client._intra_request_delay = 60 / (requests_per_minute - 1) + client._sleep_until = time.perf_counter() - client._intra_request_delay + client._wrapped_get = client._get + client._get = types.MethodType(_get, client) + class Post(dict): post_db = {} topic_db = {} @@ -110,7 +138,6 @@ class Post(dict): def get_user(self, client): if self['username'] not in Post.user_db: - time.sleep(0.1) Post.user_db[self['username']] = client.user(self['username']) self['user'] = Post.user_db[self['username']] return self['user'] @@ -143,7 +170,6 @@ class Post(dict): def fetch(client, from_id, to_id, verbose=False): for post_id in range(from_id, to_id): try: - time.sleep(0.5) post = client._get('/posts/{}.json'.format(post_id)) post = Post(post) if post.lint(): @@ -161,7 +187,6 @@ class Post(dict): result = '.' if post['topic_id'] not in Post.topic_db: - time.sleep(0.1) try: topic = client._get( '/t/{}.json'.format( @@ -604,10 +629,17 @@ def do_piechart(args): def do_pull(args): '''Update the local cache of the database''' cfg = config.get_config() - client = pydiscourse.DiscourseClient( cfg['96btool']['server'], - api_username=cfg['96btool']['username'], - api_key=config.get_password(cfg, '96btool'), - timeout=5) + if args.login: + client = pydiscourse.DiscourseClient( cfg['96btool']['server'], + api_username=cfg['96btool']['username'], + api_key=config.get_password(cfg, '96btool'), + timeout=5) + set_rate_limit(client, 60) + else: + client = pydiscourse.DiscourseClient( + cfg['96btool']['server'], + api_username=None, api_key=None, timeout=5) + set_rate_limit(client, 200) if args.verbose: sys.stdout.write('Reading existing post cache .') @@ -924,6 +956,8 @@ def main(argv): help="Extra diagnostics") s.add_argument('--db', default=defaultdb, help="File to update") + s.add_argument('--login', action='store_true', + help="Do not fetch information anonymously") s.add_argument('--pipe', action='store_true', help="Duplicate output on stdout") s.add_argument('--refresh', action='store_true', |