web.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720
  1. # encoding: utf-8
  2. #
  3. # Copyright (c) 2014 Dean Jackson <[email protected]>
  4. #
  5. # MIT Licence. See http://opensource.org/licenses/MIT
  6. #
  7. # Created on 2014-02-15
  8. #
  9. """Lightweight HTTP library with a requests-like interface."""
  10. from __future__ import absolute_import, print_function
  11. import codecs
  12. import json
  13. import mimetypes
  14. import os
  15. import random
  16. import re
  17. import socket
  18. import string
  19. import unicodedata
  20. import urllib
  21. import urllib2
  22. import urlparse
  23. import zlib
  24. __version__ = open(os.path.join(os.path.dirname(__file__), 'version')).read()
  25. USER_AGENT = (u'Alfred-Workflow/' + __version__ +
  26. ' (+http://www.deanishe.net/alfred-workflow)')
  27. # Valid characters for multipart form data boundaries
  28. BOUNDARY_CHARS = string.digits + string.ascii_letters
  29. # HTTP response codes
  30. RESPONSES = {
  31. 100: 'Continue',
  32. 101: 'Switching Protocols',
  33. 200: 'OK',
  34. 201: 'Created',
  35. 202: 'Accepted',
  36. 203: 'Non-Authoritative Information',
  37. 204: 'No Content',
  38. 205: 'Reset Content',
  39. 206: 'Partial Content',
  40. 300: 'Multiple Choices',
  41. 301: 'Moved Permanently',
  42. 302: 'Found',
  43. 303: 'See Other',
  44. 304: 'Not Modified',
  45. 305: 'Use Proxy',
  46. 307: 'Temporary Redirect',
  47. 400: 'Bad Request',
  48. 401: 'Unauthorized',
  49. 402: 'Payment Required',
  50. 403: 'Forbidden',
  51. 404: 'Not Found',
  52. 405: 'Method Not Allowed',
  53. 406: 'Not Acceptable',
  54. 407: 'Proxy Authentication Required',
  55. 408: 'Request Timeout',
  56. 409: 'Conflict',
  57. 410: 'Gone',
  58. 411: 'Length Required',
  59. 412: 'Precondition Failed',
  60. 413: 'Request Entity Too Large',
  61. 414: 'Request-URI Too Long',
  62. 415: 'Unsupported Media Type',
  63. 416: 'Requested Range Not Satisfiable',
  64. 417: 'Expectation Failed',
  65. 500: 'Internal Server Error',
  66. 501: 'Not Implemented',
  67. 502: 'Bad Gateway',
  68. 503: 'Service Unavailable',
  69. 504: 'Gateway Timeout',
  70. 505: 'HTTP Version Not Supported'
  71. }
  72. def str_dict(dic):
  73. """Convert keys and values in ``dic`` into UTF-8-encoded :class:`str`.
  74. :param dic: Mapping of Unicode strings
  75. :type dic: dict
  76. :returns: Dictionary containing only UTF-8 strings
  77. :rtype: dict
  78. """
  79. if isinstance(dic, CaseInsensitiveDictionary):
  80. dic2 = CaseInsensitiveDictionary()
  81. else:
  82. dic2 = {}
  83. for k, v in dic.items():
  84. if isinstance(k, unicode):
  85. k = k.encode('utf-8')
  86. if isinstance(v, unicode):
  87. v = v.encode('utf-8')
  88. dic2[k] = v
  89. return dic2
  90. class NoRedirectHandler(urllib2.HTTPRedirectHandler):
  91. """Prevent redirections."""
  92. def redirect_request(self, *args):
  93. """Ignore redirect."""
  94. return None
  95. # Adapted from https://gist.github.com/babakness/3901174
  96. class CaseInsensitiveDictionary(dict):
  97. """Dictionary with caseless key search.
  98. Enables case insensitive searching while preserving case sensitivity
  99. when keys are listed, ie, via keys() or items() methods.
  100. Works by storing a lowercase version of the key as the new key and
  101. stores the original key-value pair as the key's value
  102. (values become dictionaries).
  103. """
  104. def __init__(self, initval=None):
  105. """Create new case-insensitive dictionary."""
  106. if isinstance(initval, dict):
  107. for key, value in initval.iteritems():
  108. self.__setitem__(key, value)
  109. elif isinstance(initval, list):
  110. for (key, value) in initval:
  111. self.__setitem__(key, value)
  112. def __contains__(self, key):
  113. return dict.__contains__(self, key.lower())
  114. def __getitem__(self, key):
  115. return dict.__getitem__(self, key.lower())['val']
  116. def __setitem__(self, key, value):
  117. return dict.__setitem__(self, key.lower(), {'key': key, 'val': value})
  118. def get(self, key, default=None):
  119. """Return value for case-insensitive key or default."""
  120. try:
  121. v = dict.__getitem__(self, key.lower())
  122. except KeyError:
  123. return default
  124. else:
  125. return v['val']
  126. def update(self, other):
  127. """Update values from other ``dict``."""
  128. for k, v in other.items():
  129. self[k] = v
  130. def items(self):
  131. """Return ``(key, value)`` pairs."""
  132. return [(v['key'], v['val']) for v in dict.itervalues(self)]
  133. def keys(self):
  134. """Return original keys."""
  135. return [v['key'] for v in dict.itervalues(self)]
  136. def values(self):
  137. """Return all values."""
  138. return [v['val'] for v in dict.itervalues(self)]
  139. def iteritems(self):
  140. """Iterate over ``(key, value)`` pairs."""
  141. for v in dict.itervalues(self):
  142. yield v['key'], v['val']
  143. def iterkeys(self):
  144. """Iterate over original keys."""
  145. for v in dict.itervalues(self):
  146. yield v['key']
  147. def itervalues(self):
  148. """Interate over values."""
  149. for v in dict.itervalues(self):
  150. yield v['val']
  151. class Request(urllib2.Request):
  152. """Subclass of :class:`urllib2.Request` that supports custom methods."""
  153. def __init__(self, *args, **kwargs):
  154. """Create a new :class:`Request`."""
  155. self._method = kwargs.pop('method', None)
  156. urllib2.Request.__init__(self, *args, **kwargs)
  157. def get_method(self):
  158. return self._method.upper()
  159. class Response(object):
  160. """
  161. Returned by :func:`request` / :func:`get` / :func:`post` functions.
  162. Simplified version of the ``Response`` object in the ``requests`` library.
  163. >>> r = request('http://www.google.com')
  164. >>> r.status_code
  165. 200
  166. >>> r.encoding
  167. ISO-8859-1
  168. >>> r.content # bytes
  169. <html> ...
  170. >>> r.text # unicode, decoded according to charset in HTTP header/meta tag
  171. u'<html> ...'
  172. >>> r.json() # content parsed as JSON
  173. """
  174. def __init__(self, request, stream=False):
  175. """Call `request` with :mod:`urllib2` and process results.
  176. :param request: :class:`Request` instance
  177. :param stream: Whether to stream response or retrieve it all at once
  178. :type stream: bool
  179. """
  180. self.request = request
  181. self._stream = stream
  182. self.url = None
  183. self.raw = None
  184. self._encoding = None
  185. self.error = None
  186. self.status_code = None
  187. self.reason = None
  188. self.headers = CaseInsensitiveDictionary()
  189. self._content = None
  190. self._content_loaded = False
  191. self._gzipped = False
  192. # Execute query
  193. try:
  194. self.raw = urllib2.urlopen(request)
  195. except urllib2.HTTPError as err:
  196. self.error = err
  197. try:
  198. self.url = err.geturl()
  199. # sometimes (e.g. when authentication fails)
  200. # urllib can't get a URL from an HTTPError
  201. # This behaviour changes across Python versions,
  202. # so no test cover (it isn't important).
  203. except AttributeError: # pragma: no cover
  204. pass
  205. self.status_code = err.code
  206. else:
  207. self.status_code = self.raw.getcode()
  208. self.url = self.raw.geturl()
  209. self.reason = RESPONSES.get(self.status_code)
  210. # Parse additional info if request succeeded
  211. if not self.error:
  212. headers = self.raw.info()
  213. self.transfer_encoding = headers.getencoding()
  214. self.mimetype = headers.gettype()
  215. for key in headers.keys():
  216. self.headers[key.lower()] = headers.get(key)
  217. # Is content gzipped?
  218. # Transfer-Encoding appears to not be used in the wild
  219. # (contrary to the HTTP standard), but no harm in testing
  220. # for it
  221. if 'gzip' in headers.get('content-encoding', '') or \
  222. 'gzip' in headers.get('transfer-encoding', ''):
  223. self._gzipped = True
  224. @property
  225. def stream(self):
  226. """Whether response is streamed.
  227. Returns:
  228. bool: `True` if response is streamed.
  229. """
  230. return self._stream
  231. @stream.setter
  232. def stream(self, value):
  233. if self._content_loaded:
  234. raise RuntimeError("`content` has already been read from "
  235. "this Response.")
  236. self._stream = value
  237. def json(self):
  238. """Decode response contents as JSON.
  239. :returns: object decoded from JSON
  240. :rtype: list, dict or unicode
  241. """
  242. return json.loads(self.content, self.encoding or 'utf-8')
  243. @property
  244. def encoding(self):
  245. """Text encoding of document or ``None``.
  246. :returns: Text encoding if found.
  247. :rtype: str or ``None``
  248. """
  249. if not self._encoding:
  250. self._encoding = self._get_encoding()
  251. return self._encoding
  252. @property
  253. def content(self):
  254. """Raw content of response (i.e. bytes).
  255. :returns: Body of HTTP response
  256. :rtype: str
  257. """
  258. if not self._content:
  259. # Decompress gzipped content
  260. if self._gzipped:
  261. decoder = zlib.decompressobj(16 + zlib.MAX_WBITS)
  262. self._content = decoder.decompress(self.raw.read())
  263. else:
  264. self._content = self.raw.read()
  265. self._content_loaded = True
  266. return self._content
  267. @property
  268. def text(self):
  269. """Unicode-decoded content of response body.
  270. If no encoding can be determined from HTTP headers or the content
  271. itself, the encoded response body will be returned instead.
  272. :returns: Body of HTTP response
  273. :rtype: unicode or str
  274. """
  275. if self.encoding:
  276. return unicodedata.normalize('NFC', unicode(self.content,
  277. self.encoding))
  278. return self.content
  279. def iter_content(self, chunk_size=4096, decode_unicode=False):
  280. """Iterate over response data.
  281. .. versionadded:: 1.6
  282. :param chunk_size: Number of bytes to read into memory
  283. :type chunk_size: int
  284. :param decode_unicode: Decode to Unicode using detected encoding
  285. :type decode_unicode: bool
  286. :returns: iterator
  287. """
  288. if not self.stream:
  289. raise RuntimeError("You cannot call `iter_content` on a "
  290. "Response unless you passed `stream=True`"
  291. " to `get()`/`post()`/`request()`.")
  292. if self._content_loaded:
  293. raise RuntimeError(
  294. "`content` has already been read from this Response.")
  295. def decode_stream(iterator, r):
  296. dec = codecs.getincrementaldecoder(r.encoding)(errors='replace')
  297. for chunk in iterator:
  298. data = dec.decode(chunk)
  299. if data:
  300. yield data
  301. data = dec.decode(b'', final=True)
  302. if data: # pragma: no cover
  303. yield data
  304. def generate():
  305. if self._gzipped:
  306. decoder = zlib.decompressobj(16 + zlib.MAX_WBITS)
  307. while True:
  308. chunk = self.raw.read(chunk_size)
  309. if not chunk:
  310. break
  311. if self._gzipped:
  312. chunk = decoder.decompress(chunk)
  313. yield chunk
  314. chunks = generate()
  315. if decode_unicode and self.encoding:
  316. chunks = decode_stream(chunks, self)
  317. return chunks
  318. def save_to_path(self, filepath):
  319. """Save retrieved data to file at ``filepath``.
  320. .. versionadded: 1.9.6
  321. :param filepath: Path to save retrieved data.
  322. """
  323. filepath = os.path.abspath(filepath)
  324. dirname = os.path.dirname(filepath)
  325. if not os.path.exists(dirname):
  326. os.makedirs(dirname)
  327. self.stream = True
  328. with open(filepath, 'wb') as fileobj:
  329. for data in self.iter_content():
  330. fileobj.write(data)
  331. def raise_for_status(self):
  332. """Raise stored error if one occurred.
  333. error will be instance of :class:`urllib2.HTTPError`
  334. """
  335. if self.error is not None:
  336. raise self.error
  337. return
  338. def _get_encoding(self):
  339. """Get encoding from HTTP headers or content.
  340. :returns: encoding or `None`
  341. :rtype: unicode or ``None``
  342. """
  343. headers = self.raw.info()
  344. encoding = None
  345. if headers.getparam('charset'):
  346. encoding = headers.getparam('charset')
  347. # HTTP Content-Type header
  348. for param in headers.getplist():
  349. if param.startswith('charset='):
  350. encoding = param[8:]
  351. break
  352. if not self.stream: # Try sniffing response content
  353. # Encoding declared in document should override HTTP headers
  354. if self.mimetype == 'text/html': # sniff HTML headers
  355. m = re.search(r"""<meta.+charset=["']{0,1}(.+?)["'].*>""",
  356. self.content)
  357. if m:
  358. encoding = m.group(1)
  359. elif ((self.mimetype.startswith('application/')
  360. or self.mimetype.startswith('text/'))
  361. and 'xml' in self.mimetype):
  362. m = re.search(r"""<?xml.+encoding=["'](.+?)["'][^>]*\?>""",
  363. self.content)
  364. if m:
  365. encoding = m.group(1)
  366. # Format defaults
  367. if self.mimetype == 'application/json' and not encoding:
  368. # The default encoding for JSON
  369. encoding = 'utf-8'
  370. elif self.mimetype == 'application/xml' and not encoding:
  371. # The default for 'application/xml'
  372. encoding = 'utf-8'
  373. if encoding:
  374. encoding = encoding.lower()
  375. return encoding
  376. def request(method, url, params=None, data=None, headers=None, cookies=None,
  377. files=None, auth=None, timeout=60, allow_redirects=False,
  378. stream=False):
  379. """Initiate an HTTP(S) request. Returns :class:`Response` object.
  380. :param method: 'GET' or 'POST'
  381. :type method: unicode
  382. :param url: URL to open
  383. :type url: unicode
  384. :param params: mapping of URL parameters
  385. :type params: dict
  386. :param data: mapping of form data ``{'field_name': 'value'}`` or
  387. :class:`str`
  388. :type data: dict or str
  389. :param headers: HTTP headers
  390. :type headers: dict
  391. :param cookies: cookies to send to server
  392. :type cookies: dict
  393. :param files: files to upload (see below).
  394. :type files: dict
  395. :param auth: username, password
  396. :type auth: tuple
  397. :param timeout: connection timeout limit in seconds
  398. :type timeout: int
  399. :param allow_redirects: follow redirections
  400. :type allow_redirects: bool
  401. :param stream: Stream content instead of fetching it all at once.
  402. :type stream: bool
  403. :returns: Response object
  404. :rtype: :class:`Response`
  405. The ``files`` argument is a dictionary::
  406. {'fieldname' : { 'filename': 'blah.txt',
  407. 'content': '<binary data>',
  408. 'mimetype': 'text/plain'}
  409. }
  410. * ``fieldname`` is the name of the field in the HTML form.
  411. * ``mimetype`` is optional. If not provided, :mod:`mimetypes` will
  412. be used to guess the mimetype, or ``application/octet-stream``
  413. will be used.
  414. """
  415. # TODO: cookies
  416. socket.setdefaulttimeout(timeout)
  417. # Default handlers
  418. openers = []
  419. if not allow_redirects:
  420. openers.append(NoRedirectHandler())
  421. if auth is not None: # Add authorisation handler
  422. username, password = auth
  423. password_manager = urllib2.HTTPPasswordMgrWithDefaultRealm()
  424. password_manager.add_password(None, url, username, password)
  425. auth_manager = urllib2.HTTPBasicAuthHandler(password_manager)
  426. openers.append(auth_manager)
  427. # Install our custom chain of openers
  428. opener = urllib2.build_opener(*openers)
  429. urllib2.install_opener(opener)
  430. if not headers:
  431. headers = CaseInsensitiveDictionary()
  432. else:
  433. headers = CaseInsensitiveDictionary(headers)
  434. if 'user-agent' not in headers:
  435. headers['user-agent'] = USER_AGENT
  436. # Accept gzip-encoded content
  437. encodings = [s.strip() for s in
  438. headers.get('accept-encoding', '').split(',')]
  439. if 'gzip' not in encodings:
  440. encodings.append('gzip')
  441. headers['accept-encoding'] = ', '.join(encodings)
  442. if files:
  443. if not data:
  444. data = {}
  445. new_headers, data = encode_multipart_formdata(data, files)
  446. headers.update(new_headers)
  447. elif data and isinstance(data, dict):
  448. data = urllib.urlencode(str_dict(data))
  449. # Make sure everything is encoded text
  450. headers = str_dict(headers)
  451. if isinstance(url, unicode):
  452. url = url.encode('utf-8')
  453. if params: # GET args (POST args are handled in encode_multipart_formdata)
  454. scheme, netloc, path, query, fragment = urlparse.urlsplit(url)
  455. if query: # Combine query string and `params`
  456. url_params = urlparse.parse_qs(query)
  457. # `params` take precedence over URL query string
  458. url_params.update(params)
  459. params = url_params
  460. query = urllib.urlencode(str_dict(params), doseq=True)
  461. url = urlparse.urlunsplit((scheme, netloc, path, query, fragment))
  462. req = Request(url, data, headers, method=method)
  463. return Response(req, stream)
  464. def get(url, params=None, headers=None, cookies=None, auth=None,
  465. timeout=60, allow_redirects=True, stream=False):
  466. """Initiate a GET request. Arguments as for :func:`request`.
  467. :returns: :class:`Response` instance
  468. """
  469. return request('GET', url, params, headers=headers, cookies=cookies,
  470. auth=auth, timeout=timeout, allow_redirects=allow_redirects,
  471. stream=stream)
  472. def delete(url, params=None, data=None, headers=None, cookies=None, auth=None,
  473. timeout=60, allow_redirects=True, stream=False):
  474. """Initiate a DELETE request. Arguments as for :func:`request`.
  475. :returns: :class:`Response` instance
  476. """
  477. return request('DELETE', url, params, data, headers=headers,
  478. cookies=cookies, auth=auth, timeout=timeout,
  479. allow_redirects=allow_redirects, stream=stream)
  480. def post(url, params=None, data=None, headers=None, cookies=None, files=None,
  481. auth=None, timeout=60, allow_redirects=False, stream=False):
  482. """Initiate a POST request. Arguments as for :func:`request`.
  483. :returns: :class:`Response` instance
  484. """
  485. return request('POST', url, params, data, headers, cookies, files, auth,
  486. timeout, allow_redirects, stream)
  487. def put(url, params=None, data=None, headers=None, cookies=None, files=None,
  488. auth=None, timeout=60, allow_redirects=False, stream=False):
  489. """Initiate a PUT request. Arguments as for :func:`request`.
  490. :returns: :class:`Response` instance
  491. """
  492. return request('PUT', url, params, data, headers, cookies, files, auth,
  493. timeout, allow_redirects, stream)
  494. def encode_multipart_formdata(fields, files):
  495. """Encode form data (``fields``) and ``files`` for POST request.
  496. :param fields: mapping of ``{name : value}`` pairs for normal form fields.
  497. :type fields: dict
  498. :param files: dictionary of fieldnames/files elements for file data.
  499. See below for details.
  500. :type files: dict of :class:`dict`
  501. :returns: ``(headers, body)`` ``headers`` is a
  502. :class:`dict` of HTTP headers
  503. :rtype: 2-tuple ``(dict, str)``
  504. The ``files`` argument is a dictionary::
  505. {'fieldname' : { 'filename': 'blah.txt',
  506. 'content': '<binary data>',
  507. 'mimetype': 'text/plain'}
  508. }
  509. - ``fieldname`` is the name of the field in the HTML form.
  510. - ``mimetype`` is optional. If not provided, :mod:`mimetypes` will
  511. be used to guess the mimetype, or ``application/octet-stream``
  512. will be used.
  513. """
  514. def get_content_type(filename):
  515. """Return or guess mimetype of ``filename``.
  516. :param filename: filename of file
  517. :type filename: unicode/str
  518. :returns: mime-type, e.g. ``text/html``
  519. :rtype: str
  520. """
  521. return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
  522. boundary = '-----' + ''.join(random.choice(BOUNDARY_CHARS)
  523. for i in range(30))
  524. CRLF = '\r\n'
  525. output = []
  526. # Normal form fields
  527. for (name, value) in fields.items():
  528. if isinstance(name, unicode):
  529. name = name.encode('utf-8')
  530. if isinstance(value, unicode):
  531. value = value.encode('utf-8')
  532. output.append('--' + boundary)
  533. output.append('Content-Disposition: form-data; name="%s"' % name)
  534. output.append('')
  535. output.append(value)
  536. # Files to upload
  537. for name, d in files.items():
  538. filename = d[u'filename']
  539. content = d[u'content']
  540. if u'mimetype' in d:
  541. mimetype = d[u'mimetype']
  542. else:
  543. mimetype = get_content_type(filename)
  544. if isinstance(name, unicode):
  545. name = name.encode('utf-8')
  546. if isinstance(filename, unicode):
  547. filename = filename.encode('utf-8')
  548. if isinstance(mimetype, unicode):
  549. mimetype = mimetype.encode('utf-8')
  550. output.append('--' + boundary)
  551. output.append('Content-Disposition: form-data; '
  552. 'name="%s"; filename="%s"' % (name, filename))
  553. output.append('Content-Type: %s' % mimetype)
  554. output.append('')
  555. output.append(content)
  556. output.append('--' + boundary + '--')
  557. output.append('')
  558. body = CRLF.join(output)
  559. headers = {
  560. 'Content-Type': 'multipart/form-data; boundary=%s' % boundary,
  561. 'Content-Length': str(len(body)),
  562. }
  563. return (headers, body)