Source code for ubelt.util_download

# -*- coding: utf-8 -*-
"""
Helpers for downloading data
"""
from __future__ import (absolute_import, division, print_function,
                        unicode_literals)
from os.path import basename, join, exists
import os
import shutil
import tempfile
from ubelt import util_platform

# try:  # nocover
# from requests import get as urlopen
# _have_requests = True
# except ImportError:  # nocover
# _have_requests = False
import sys
if sys.version_info[0] == 2:  # nocover
    from urlparse import urlparse  # NOQA
    from urllib2 import urlopen  # NOQA
    from urllib2 import URLError  # NOQA
else:
    from urllib.request import urlopen  # NOQA
    from urllib.parse import urlparse  # NOQA
    from urllib.error import URLError  # NOQA


try:  # nocover
    raise ImportError()
    from tqdm import tqdm as _tqdm
except ImportError:  # nocover
    # fake tqdm if it's not installed
    from ubelt import progiter
    _tqdm = progiter.ProgIter


__all__ = ['download', 'grabdata']


[docs]def download(url, fpath=None, hash_prefix=None, chunksize=8192, verbose=1):
    """
    downloads a url to a fpath.

    Args:
        url (str): url to download
        fpath (str): path to download to. Defaults to basename of url and
            ubelt's application cache.
        chunksize (int): download chunksize
        verbose (bool): verbosity

    Notes:
        Original code taken from pytorch in torch/utils/model_zoo.py and
        slightly modified.

    References:
        http://blog.moleculea.com/2012/10/04/urlretrieve-progres-indicator/
        http://stackoverflow.com/questions/15644964/python-progress-bar-and-downloads
        http://stackoverflow.com/questions/16694907/how-to-download-large-file-in-python-with-requests-py

    Example:
        >>> from ubelt.util_download import *  # NOQA
        >>> url = 'http://i.imgur.com/rqwaDag.png'
        >>> fpath = download(url)
        >>> print(basename(fpath))
        rqwaDag.png
    """
    if fpath is None:
        dpath = util_platform.ensure_app_cache_dir('ubelt')
        fname = basename(url)
        fpath = join(dpath, fname)

    if verbose:
        print('Downloading url=%r to fpath=%r' % (url, fpath))

    urldata = urlopen(url)
    # if _have_requests:
    # file_size = int(urldata.headers["Content-Length"])
    # urldata = urldata.raw
    # else:
    meta = urldata.info()
    if hasattr(meta, 'getheaders'):  # nocover
        file_size = int(meta.getheaders("Content-Length")[0])
    else:
        file_size = int(meta.get_all("Content-Length")[0])

    tmp = tempfile.NamedTemporaryFile(delete=False)
    try:
        # if hash_prefix:
        #     sha256 = hashlib.sha256()
        with _tqdm(total=file_size, disable=not verbose) as pbar:
            while True:
                buffer = urldata.read(chunksize)
                if len(buffer) == 0:
                    break
                tmp.write(buffer)
                # if hash_prefix:
                #     sha256.update(buffer)
                pbar.update(len(buffer))

        tmp.close()
        # if hash_prefix:
        #     digest = sha256.hexdigest()
        #     if digest[:len(hash_prefix)] != hash_prefix:
        #         raise RuntimeError('invalid hash value (expected "{}", got "{}")'
        #                            .format(hash_prefix, digest))
        shutil.move(tmp.name, fpath)
    finally:
        tmp.close()
        # If for some reason the move failed, delete the temporary file
        if exists(tmp.name):  # nocover
            os.remove(tmp.name)
    return fpath


[docs]def grabdata(url, fpath=None, dpath=None, fname=None, redo=False,
             verbose=1, appname=None, **download_kw):
    """
    Downloads a file, caches it, and returns its local path.

    Args:
        url (str): url to the file to download
        fpath (str): The full path to download the file to. If unspecified, the
            arguments `dpath` and `fname` are used to determine this.
        dpath (str): where to download the file. If unspecified `appname`
            is used to determine this. Mutually exclusive with fpath.
        fname (str): What to name the downloaded file. Defaults to the url
            basename. Mutually exclusive with fpath.
        redo (bool): if True forces redownload of the file (default = False)
        verbose (bool):  verbosity flag (default = True)
        appname (str): set dpath to `ub.get_app_cache_dir(appname)`.
            Mutually exclusive with dpath and fpath.
        **download_kw: additional kwargs to pass to ub.download

    Returns:
        str: fpath - file path string

    Example:
        >>> import ubelt as ub
        >>> file_url = 'http://i.imgur.com/rqwaDag.png'
        >>> lena_fpath = ub.grabdata(file_url, fname='mario.png')
        >>> result = basename(lena_fpath)
        >>> print(result)
        mario.png
    """
    if appname and dpath:
        raise ValueError('Cannot specify appname with dpath')
    if fpath and (dpath or fname or appname):
        raise ValueError('Cannot specify fpath with dpath or fname')

    if fpath is None:
        if dpath is None:
            appname = appname or 'ubelt'
            dpath = util_platform.ensure_app_cache_dir(appname)
        if fname is None:
            fname = basename(url)
        fpath = join(dpath, fname)

    if redo or not exists(fpath):
        fpath = download(url, fpath, verbose=verbose, **download_kw)
    else:
        if verbose >= 2:
            print('Already have file %s' % fpath)
    return fpath


if __name__ == '__main__':
    r"""
    CommandLine:
        python -m ubelt.util_download all
    """
    import xdoctest
    xdoctest.doctest_module(__file__)