download.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. #!/usr/bin/env python
  2. #
  3. # Copyright (c) 2018 Yousong Zhou <[email protected]>
  4. #
  5. # This is free software, licensed under the GNU General Public License v2.
  6. # See /LICENSE for more information.
  7. import argparse
  8. import calendar
  9. import datetime
  10. import errno
  11. import fcntl
  12. import json
  13. import os
  14. import os.path
  15. import re
  16. import shutil
  17. import ssl
  18. import subprocess
  19. import sys
  20. import time
  21. import urllib2
  22. TMPDIR = os.environ.get('TMP_DIR') or '/tmp'
  23. TMPDIR_DL = os.path.join(TMPDIR, 'dl')
  24. DOWNLOAD_METHODS = []
  25. class PathException(Exception): pass
  26. class DownloadException(Exception): pass
  27. class Path(object):
  28. """Context class for preparing and cleaning up directories.
  29. If ``path`` ``isdir``, then it will be created on context enter.
  30. If ``keep`` is True, then ``path`` will NOT be removed on context exit
  31. """
  32. def __init__(self, path, isdir=True, keep=False):
  33. self.path = path
  34. self.isdir = isdir
  35. self.keep = keep
  36. def __enter__(self):
  37. if self.isdir:
  38. self.mkdir_all(self.path)
  39. return self
  40. def __exit__(self, exc_type, exc_value, traceback):
  41. if not self.keep:
  42. self.rm_all(self.path)
  43. @staticmethod
  44. def mkdir_all(path):
  45. """Same as mkdir -p."""
  46. names = os.path.split(path)
  47. p = ''
  48. for name in names:
  49. p = os.path.join(p, name)
  50. Path._mkdir(p)
  51. @staticmethod
  52. def _rmdir_all(dir_):
  53. names = Path._listdir(dir_)
  54. for name in names:
  55. p = os.path.join(dir_, name)
  56. if os.path.isdir(p):
  57. Path._rmdir_all(p)
  58. else:
  59. Path._remove(p)
  60. Path._rmdir(dir_)
  61. @staticmethod
  62. def _mkdir(path):
  63. Path._os_func(os.mkdir, path, errno.EEXIST)
  64. @staticmethod
  65. def _rmdir(path):
  66. Path._os_func(os.rmdir, path, errno.ENOENT)
  67. @staticmethod
  68. def _remove(path):
  69. Path._os_func(os.remove, path, errno.ENOENT)
  70. @staticmethod
  71. def _listdir(path):
  72. return Path._os_func(os.listdir, path, errno.ENOENT, default=[])
  73. @staticmethod
  74. def _os_func(func, path, errno, default=None):
  75. """Call func(path) in an idempotent way.
  76. On exception ``ex``, if the type is OSError and ``ex.errno == errno``,
  77. return ``default``, otherwise, re-raise
  78. """
  79. try:
  80. return func(path)
  81. except OSError as e:
  82. if e.errno == errno:
  83. return default
  84. else:
  85. raise
  86. @staticmethod
  87. def rm_all(path):
  88. """Same as rm -r."""
  89. if os.path.isdir(path):
  90. Path._rmdir_all(path)
  91. else:
  92. Path._remove(path)
  93. @staticmethod
  94. def untar(path, into=None):
  95. """Extract tarball at ``path`` into subdir ``into``.
  96. return subdir name if and only if there exists one, otherwise raise PathException
  97. """
  98. args = ('tar', '-C', into, '-xzf', path, '--no-same-permissions')
  99. subprocess.check_call(args, preexec_fn=lambda: os.umask(0o22))
  100. dirs = os.listdir(into)
  101. if len(dirs) == 1:
  102. return dirs[0]
  103. else:
  104. raise PathException('untar %s: expecting a single subdir, got %s' % (path, dirs))
  105. @staticmethod
  106. def tar(path, subdir, into=None, ts=None):
  107. """Pack ``path`` into tarball ``into``."""
  108. # --sort=name requires a recent build of GNU tar
  109. args = ['tar', '--numeric-owner', '--owner=0', '--group=0', '--sort=name']
  110. args += ['-C', path, '-cf', into, subdir]
  111. envs = os.environ.copy()
  112. if ts is not None:
  113. args.append('--mtime=@%d' % ts)
  114. if into.endswith('.xz'):
  115. envs['XZ_OPT'] = '-7e'
  116. args.append('-J')
  117. elif into.endswith('.bz2'):
  118. args.append('-j')
  119. elif into.endswith('.gz'):
  120. args.append('-z')
  121. envs['GZIP'] = '-n'
  122. else:
  123. raise PathException('unknown compression type %s' % into)
  124. subprocess.check_call(args, env=envs)
  125. class GitHubCommitTsCache(object):
  126. __cachef = 'github.commit.ts.cache'
  127. __cachen = 2048
  128. def __init__(self):
  129. Path.mkdir_all(TMPDIR_DL)
  130. self.cachef = os.path.join(TMPDIR_DL, self.__cachef)
  131. self.cache = {}
  132. def get(self, k):
  133. """Get timestamp with key ``k``."""
  134. fileno = os.open(self.cachef, os.O_RDONLY | os.O_CREAT)
  135. with os.fdopen(fileno) as fin:
  136. try:
  137. fcntl.lockf(fileno, fcntl.LOCK_SH)
  138. self._cache_init(fin)
  139. if k in self.cache:
  140. ts = self.cache[k][0]
  141. return ts
  142. finally:
  143. fcntl.lockf(fileno, fcntl.LOCK_UN)
  144. return None
  145. def set(self, k, v):
  146. """Update timestamp with ``k``."""
  147. fileno = os.open(self.cachef, os.O_RDWR | os.O_CREAT)
  148. with os.fdopen(fileno, 'wb+') as f:
  149. try:
  150. fcntl.lockf(fileno, fcntl.LOCK_EX)
  151. self._cache_init(f)
  152. self.cache[k] = (v, int(time.time()))
  153. self._cache_flush(f)
  154. finally:
  155. fcntl.lockf(fileno, fcntl.LOCK_UN)
  156. def _cache_init(self, fin):
  157. for line in fin:
  158. k, ts, updated = line.split()
  159. ts = int(ts)
  160. updated = int(updated)
  161. self.cache[k] = (ts, updated)
  162. def _cache_flush(self, fout):
  163. cache = sorted(self.cache.iteritems(), cmp=lambda a, b: b[1][1] - a[1][1])
  164. cache = cache[:self.__cachen]
  165. self.cache = {}
  166. os.ftruncate(fout.fileno(), 0)
  167. fout.seek(0, os.SEEK_SET)
  168. for k, ent in cache:
  169. ts = ent[0]
  170. updated = ent[1]
  171. line = '{0} {1} {2}\n'.format(k, ts, updated)
  172. fout.write(line)
  173. class DownloadMethod(object):
  174. """Base class of all download method."""
  175. def __init__(self, args):
  176. self.args = args
  177. self.urls = args.urls
  178. self.url = self.urls[0]
  179. self.dl_dir = args.dl_dir
  180. @classmethod
  181. def resolve(cls, args):
  182. """Resolve download method to use.
  183. return instance of subclass of DownloadMethod
  184. """
  185. for c in DOWNLOAD_METHODS:
  186. if c.match(args):
  187. return c(args)
  188. @staticmethod
  189. def match(args):
  190. """Return True if it can do the download."""
  191. return NotImplemented
  192. def download(self):
  193. """Do the download and put it into the download dir."""
  194. return NotImplemented
  195. class DownloadMethodGitHubTarball(DownloadMethod):
  196. """Download and repack archive tarabll from GitHub."""
  197. __repo_url_regex = re.compile(r'^(?:https|git)://github.com/(?P<owner>[^/]+)/(?P<repo>[^/]+)')
  198. def __init__(self, args):
  199. super(DownloadMethodGitHubTarball, self).__init__(args)
  200. self._init_owner_repo()
  201. self.version = args.version
  202. self.subdir = args.subdir
  203. self.source = args.source
  204. self.commit_ts = None # lazy load commit timestamp
  205. self.commit_ts_cache = GitHubCommitTsCache()
  206. self.name = 'github-tarball'
  207. @staticmethod
  208. def match(args):
  209. """Match if it's a GitHub clone url."""
  210. url = args.urls[0]
  211. proto = args.proto
  212. if proto == 'git' and isinstance(url, basestring) \
  213. and (url.startswith('https://github.com/') or url.startswith('git://github.com/')):
  214. return True
  215. return False
  216. def download(self):
  217. """Download and repack GitHub archive tarball."""
  218. self._init_commit_ts()
  219. with Path(TMPDIR_DL, keep=True) as dir_dl:
  220. # fetch tarball from GitHub
  221. tarball_path = os.path.join(dir_dl.path, self.subdir + '.tar.gz.dl')
  222. with Path(tarball_path, isdir=False):
  223. self._fetch(tarball_path)
  224. # unpack
  225. d = os.path.join(dir_dl.path, self.subdir + '.untar')
  226. with Path(d) as dir_untar:
  227. tarball_prefix = Path.untar(tarball_path, into=dir_untar.path)
  228. dir0 = os.path.join(dir_untar.path, tarball_prefix)
  229. dir1 = os.path.join(dir_untar.path, self.subdir)
  230. # submodules check
  231. if self._has_submodule(dir0):
  232. raise DownloadException('unable to fetch submodules\' source code')
  233. # rename subdir
  234. os.rename(dir0, dir1)
  235. # repack
  236. into=os.path.join(TMPDIR_DL, self.source)
  237. Path.tar(dir_untar.path, self.subdir, into=into, ts=self.commit_ts)
  238. # move to target location
  239. file1 = os.path.join(self.dl_dir, self.source)
  240. if into != file1:
  241. shutil.move(into, file1)
  242. def _has_submodule(self, dir_):
  243. m = os.path.join(dir_, '.gitmodules')
  244. try:
  245. st = os.stat(m)
  246. return st.st_size > 0
  247. except OSError as e:
  248. return e.errno != errno.ENOENT
  249. def _init_owner_repo(self):
  250. url = self.url
  251. m = self.__repo_url_regex.search(url)
  252. if m is None:
  253. raise DownloadException('invalid github url: %s' % url)
  254. owner = m.group('owner')
  255. repo = m.group('repo')
  256. if repo.endswith('.git'):
  257. repo = repo[:-4]
  258. self.owner = owner
  259. self.repo = repo
  260. def _init_commit_ts(self):
  261. if self.commit_ts is not None:
  262. return
  263. url = self._make_repo_url_path('git', 'commits', self.version)
  264. ct = self.commit_ts_cache.get(url)
  265. if ct is not None:
  266. self.commit_ts = ct
  267. return
  268. resp = self._make_request(url)
  269. data = resp.read()
  270. data = json.loads(data)
  271. date = data['committer']['date']
  272. date = datetime.datetime.strptime(date, '%Y-%m-%dT%H:%M:%SZ')
  273. date = date.timetuple()
  274. ct = calendar.timegm(date)
  275. self.commit_ts = ct
  276. self.commit_ts_cache.set(url, ct)
  277. def _fetch(self, path):
  278. """Fetch tarball of the specified version ref."""
  279. ref = self.version
  280. url = self._make_repo_url_path('tarball', ref)
  281. resp = self._make_request(url)
  282. with open(path, 'wb') as fout:
  283. while True:
  284. d = resp.read(4096)
  285. if not d:
  286. break
  287. fout.write(d)
  288. def _make_repo_url_path(self, *args):
  289. url = '/repos/{0}/{1}'.format(self.owner, self.repo)
  290. if args:
  291. url += '/' + '/'.join(args)
  292. return url
  293. def _make_request(self, path):
  294. """Request GitHub API endpoint on ``path``."""
  295. url = 'https://api.github.com' + path
  296. headers = {
  297. 'Accept': 'application/vnd.github.v3+json',
  298. 'User-Agent': 'OpenWrt',
  299. }
  300. req = urllib2.Request(url, headers=headers)
  301. sslcontext = ssl._create_unverified_context()
  302. fileobj = urllib2.urlopen(req, context=sslcontext)
  303. return fileobj
  304. class DownloadMethodCatchall(DownloadMethod):
  305. """Dummy method that knows names but not ways of download."""
  306. def __init__(self, args):
  307. super(DownloadMethodCatchall, self).__init__(args)
  308. self.args = args
  309. self.proto = args.proto
  310. self.name = self._resolve_name()
  311. def _resolve_name(self):
  312. if self.proto:
  313. return self.proto
  314. methods_map = (
  315. ('default', ('@APACHE/', '@GITHUB/', '@GNOME/', '@GNU/',
  316. '@KERNEL/', '@SF/', '@SAVANNAH/', 'ftp://', 'http://',
  317. 'https://', 'file://')),
  318. ('git', ('git://', )),
  319. ('svn', ('svn://', )),
  320. ('cvs', ('cvs://', )),
  321. ('bzr', ('sftp://', )),
  322. ('bzr', ('sftp://', )),
  323. ('unknown', ('', )),
  324. )
  325. for name, prefixes in methods_map:
  326. if any(url.startswith(prefix) for prefix in prefixes for url in self.urls):
  327. return name
  328. @staticmethod
  329. def match(args):
  330. """Return True."""
  331. return True
  332. def download(self):
  333. """Not implemented.
  334. raise DownloadException
  335. """
  336. raise DownloadException('download method for %s is not yet implemented' % self.name)
  337. # order matters
  338. DOWNLOAD_METHODS = [
  339. DownloadMethodGitHubTarball,
  340. DownloadMethodCatchall,
  341. ]
  342. def main():
  343. parser = argparse.ArgumentParser()
  344. parser.add_argument('action', choices=('dl_method', 'dl'), help='Action to take')
  345. parser.add_argument('--urls', nargs='+', metavar='URL', help='Download URLs')
  346. parser.add_argument('--proto', help='Download proto')
  347. parser.add_argument('--subdir', help='Source code subdir name')
  348. parser.add_argument('--version', help='Source code version')
  349. parser.add_argument('--source', help='Source tarball filename')
  350. parser.add_argument('--dl-dir', default=os.getcwd(), help='Download dir')
  351. args = parser.parse_args()
  352. if args.action == 'dl_method':
  353. method = DownloadMethod.resolve(args)
  354. sys.stdout.write(method.name + '\n')
  355. elif args.action == 'dl':
  356. method = DownloadMethod.resolve(args)
  357. try:
  358. method.download()
  359. except Exception:
  360. raise
  361. if __name__ == '__main__':
  362. main()