Source code for ubelt.util_download

# -*- coding: utf-8 -*-
"""
Helpers for downloading data
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
from os.path import basename, join, exists
import os
import shutil
import tempfile
from ubelt import util_platform

# try:  # nocover
# from requests import get as urlopen
# _have_requests = True
# except ImportError:  # nocover
# _have_requests = False
import sys
if sys.version_info[0] == 2:  # nocover
    from urlparse import urlparse  # NOQA
    from urllib2 import urlopen  # NOQA
    from urllib2 import URLError  # NOQA
else:
    from urllib.request import urlopen  # NOQA
    from urllib.parse import urlparse  # NOQA
    from urllib.error import URLError  # NOQA


try:  # nocover
    raise ImportError()
    from tqdm import tqdm as _tqdm
except ImportError:  # nocover
    # fake tqdm if it's not installed
    from ubelt import progiter
    _tqdm = progiter.ProgIter


__all__ = ['download', 'grabdata']


[docs]def download(url, fpath=None, hash_prefix=None, chunksize=8192, verbose=1): """ downloads a url to a fpath. Args: url (str): url to download fpath (str): path to download to. Defaults to basename of url and ubelt's application cache. chunksize (int): download chunksize verbose (bool): verbosity Notes: Original code taken from pytorch in torch/utils/model_zoo.py and slightly modified. References: http://blog.moleculea.com/2012/10/04/urlretrieve-progres-indicator/ http://stackoverflow.com/questions/15644964/python-progress-bar-and-downloads http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py Example: >>> from ubelt.util_download import * # NOQA >>> url = 'http://i.imgur.com/rqwaDag.png' >>> fpath = download(url) >>> print(basename(fpath)) rqwaDag.png """ if fpath is None: dpath = util_platform.ensure_app_cache_dir('ubelt') fname = basename(url) fpath = join(dpath, fname) if verbose: print('Downloading url=%r to fpath=%r' % (url, fpath)) urldata = urlopen(url) # if _have_requests: # file_size = int(urldata.headers["Content-Length"]) # urldata = urldata.raw # else: meta = urldata.info() if hasattr(meta, 'getheaders'): # nocover file_size = int(meta.getheaders("Content-Length")[0]) else: file_size = int(meta.get_all("Content-Length")[0]) tmp = tempfile.NamedTemporaryFile(delete=False) try: # if hash_prefix: # sha256 = hashlib.sha256() with _tqdm(total=file_size, disable=not verbose) as pbar: while True: buffer = urldata.read(chunksize) if len(buffer) == 0: break tmp.write(buffer) # if hash_prefix: # sha256.update(buffer) pbar.update(len(buffer)) tmp.close() # if hash_prefix: # digest = sha256.hexdigest() # if digest[:len(hash_prefix)] != hash_prefix: # raise RuntimeError('invalid hash value (expected "{}", got "{}")' # .format(hash_prefix, digest)) shutil.move(tmp.name, fpath) finally: tmp.close() # If for some reason the move failed, delete the temporary file if exists(tmp.name): # nocover os.remove(tmp.name) return fpath
[docs]def grabdata(url, fpath=None, dpath=None, fname=None, redo=False, verbose=1, appname=None, **download_kw): """ Downloads a file, caches it, and returns its local path. Args: url (str): url to the file to download fpath (str): The full path to download the file to. If unspecified, the arguments `dpath` and `fname` are used to determine this. dpath (str): where to download the file. If unspecified `appname` is used to determine this. Mutually exclusive with fpath. fname (str): What to name the downloaded file. Defaults to the url basename. Mutually exclusive with fpath. redo (bool): if True forces redownload of the file (default = False) verbose (bool): verbosity flag (default = True) appname (str): set dpath to `ub.get_app_cache_dir(appname)`. Mutually exclusive with dpath and fpath. **download_kw: additional kwargs to pass to ub.download Returns: str: fpath - file path string Example: >>> import ubelt as ub >>> file_url = 'http://i.imgur.com/rqwaDag.png' >>> lena_fpath = ub.grabdata(file_url, fname='mario.png') >>> result = basename(lena_fpath) >>> print(result) mario.png """ if appname and dpath: raise ValueError('Cannot specify appname with dpath') if fpath and (dpath or fname or appname): raise ValueError('Cannot specify fpath with dpath or fname') if fpath is None: if dpath is None: appname = appname or 'ubelt' dpath = util_platform.ensure_app_cache_dir(appname) if fname is None: fname = basename(url) fpath = join(dpath, fname) if redo or not exists(fpath): fpath = download(url, fpath, verbose=verbose, **download_kw) else: if verbose >= 2: print('Already have file %s' % fpath) return fpath
if __name__ == '__main__': r""" CommandLine: python -m ubelt.util_download all """ import xdoctest xdoctest.doctest_module(__file__)