summaryrefslogtreecommitdiff
path: root/download_content_yes_to_lic.py
diff options
context:
space:
mode:
authorDeepti B. Kalakeri <deepti.kalakeri@linaro.org>2012-01-06 11:09:01 +0000
committerDeepti B. Kalakeri <deepti.kalakeri@linaro.org>2012-01-06 11:09:01 +0000
commit25ffb0f293fcb99b353a79721d4f3924dcbbbb4c (patch)
tree2fed81853fae1f158f45e7387a976257f8c8f411 /download_content_yes_to_lic.py
parentbdff58b9571dfdbda7c7a36b436d5868bf7d59b0 (diff)
Fixes the hwpack download failure because of EULA
Diffstat (limited to 'download_content_yes_to_lic.py')
-rw-r--r--download_content_yes_to_lic.py118
1 files changed, 118 insertions, 0 deletions
diff --git a/download_content_yes_to_lic.py b/download_content_yes_to_lic.py
new file mode 100644
index 0000000..4a31941
--- /dev/null
+++ b/download_content_yes_to_lic.py
@@ -0,0 +1,118 @@
+#!/usr/bin/env python
+
+# Changes required to address EULA for the origen hwpacks
+
+import argparse
+import os
+import pycurl
+import re
+import urlparse
+
+class LicenseProtectedFileFetcher:
+ """Fetch a file from the web that may be protected by a license redirect
+
+ This is designed to run on snapshots.linaro.org. License HTML file are in
+ the form:
+
+ <vendor>.html has a link to <vendor>-accept.html
+
+ If self.get is pointed at a file that has to go through one of these
+ licenses, it should be able to automatically accept the license and
+ download the file.
+
+ Once a license has been accepted, it will be used for all following
+ downloads.
+
+ If self.close() is called before the object is deleted, cURL will store
+ the license accept cookie to cookies.txt, so it can be used for later
+ downloads.
+
+ """
+ def __init__(self):
+ """Set up cURL"""
+ self.curl = pycurl.Curl()
+ self.curl.setopt(pycurl.FOLLOWLOCATION, 1)
+ self.curl.setopt(pycurl.WRITEFUNCTION, self._write_body)
+ self.curl.setopt(pycurl.HEADERFUNCTION, self._write_header)
+ self.curl.setopt(pycurl.COOKIEFILE, "cookies.txt")
+ self.curl.setopt(pycurl.COOKIEJAR, "cookies.txt")
+
+ def _get(self, url):
+ """Clear out header and body storage, fetch URL, filling them in."""
+ self.curl.setopt(pycurl.URL, url)
+
+ self.body = ""
+ self.header = ""
+
+ self.curl.perform()
+
+ def get(self, url):
+ """Fetch the requested URL, accepting licenses, returns file body
+
+ Fetches the file at url. If a redirect is encountered, it is
+ expected to be to a license that has an accept link. Follow that link,
+ then download the original file.
+
+ """
+ self._get(url)
+
+ location = self._get_location()
+ if location:
+ # Off to the races - we have been redirected.
+ # Expect to find a link to self.location with -accepted inserted
+ # before the .html, i.e. ste.html -> ste-accepted.html
+
+ # Get the file from the URL (full path)
+ file = urlparse.urlparse(location).path
+
+ # Get the file without the rest of the path
+ file = os.path.split(file)[-1]
+
+ # Look for a link with accepted.html in the page name. Follow it.
+ new_file = None
+ for line in self.body.splitlines():
+ link_search = re.search("""href=.*?["'](.*?-accepted.html)""",
+ line)
+ if link_search:
+ # Have found license accept URL!
+ new_file = link_search.group(1)
+
+ if new_file:
+ # Accept the license...
+ accept_url = re.sub(file, new_file, location)
+ self._get(accept_url)
+
+ # The above get *should* take us to the file requested via
+ # a redirect. If we manually need to follow that redirect,
+ # do that now.
+
+ if self._get_location():
+ # If we haven't been redirected to our original file,
+ # we should be able to just download it now.
+ self._get(url)
+
+ return self.body
+
+ def _search_header(self, field):
+ """Search header for the supplied field, return field / None"""
+ for line in self.header.splitlines():
+ search = re.search(field + ":\s+(.*?)$", line)
+ if search:
+ return search.group(1)
+ return None
+
+ def _get_location(self):
+ """Return content of Location field in header / None"""
+ return self._search_header("Location")
+
+ def _write_body(self, buf):
+ """Used by curl as a sink for body content"""
+ self.body += buf
+
+ def _write_header(self, buf):
+ """Used by curl as a sink for header content"""
+ self.header += buf
+
+ def close(self):
+ """Wrapper to close curl - this will allow curl to write out cookies"""
+ self.curl.close()