Module `pydload.download`

Expand source code

import requests
import progressbar
import time
import uuid

mb = 1024 * 1024

def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
    '''

    Parameters:

    url (str): URL of the file to be downloaded.

    save_to_path (str): Save as. If not provided, will be saved in the working directory with file_name auto identified from url.

    timeout (int): timeout for the initial handshake for requests.

    max_time (int): Kill the download if it takes more than max_time seconds.

        # Useful when you don't know the size of files before hand and don't want to download very large files.
    
    verbose (bool default:True): self explanatory


    Returns:

    False if downloading failed or stopped based on max_time. file_path if download is successful.

    '''
    url = url.rstrip('/')
    if 'http://' not in url[:7] and 'https://' not in url[:8]:
        if verbose:
            logging.warn('Assuming http://')
        url = 'http://' + url

    if not save_to_path:
        save_to_path = url.split('/')[-1].split('?')[0]
        if not save_to_path.strip():
            save_to_path = url.split('/')[-2]

        if not save_to_path.strip():
            save_to_path = str(uuid.uuid4())
            print('Saving file as', save_to_path)

        if verbose:
            print('Saving the file at', save_to_path)

    if max_time:
        if verbose:
            print("The download will be auto-terminated in", max_time, "if not completed.")

    try:
        request = requests.get(url, timeout=timeout, stream=True, verify=True, allow_redirects=True)
    except:
        if verbose:
            print('SSL certificate not verified...')
        request = requests.get(url, timeout=timeout, stream=True, verify=False, allow_redirects=True)

    file_size = None
    try:
        file_size = (float(request.headers['Content-length'])// mb) + 1
    except:
        if verbose:
            print('Content-length not found, file size cannot be estimated.')
        pass

    is_stopped = False

    with open(save_to_path, 'wb') as f:
        start_time = time.time()
        if verbose:
            for chunk in progressbar.progressbar(request.iter_content(mb), max_value=file_size, prefix='MB'):
                f.write(chunk)
                if max_time:
                    if time.time() - start_time >= max_time:
                        is_stopped = True
                        break
        
        else:
            for chunk in request.iter_content(mb):
                f.write(chunk)
                if max_time:
                    if time.time() - start_time >= max_time:
                        is_stopped = True
                        break
    
    if is_stopped:
        if verbose:
            print('Stopped due to excess time')
        return False
    
    else:
        if verbose:
            print('Succefully Downloaded to:', save_to_path)
        return save_to_path


def cli():
    import argparse
    parser = argparse.ArgumentParser(description='CLI for pydload')

    parser.add_argument('url', type=str, help='URL of the file to be downloaded.')

    parser.add_argument('save_to_path', type=str, nargs='?', help='save as file path/name')

    parser.add_argument('--max_time', type=int, help='Maximum time to be spent on download')
    parser.add_argument('--timeout', type=int, help='Reuest timeout')

    args = parser.parse_args()

    url = args.url
    save_to_path = args.save_to_path
    max_time = args.max_time
    timeout = args.timeout
    if not timeout: timeout=10

    dload(url, save_to_path=save_to_path, timeout=timeout, max_time=max_time, verbose=True)

Functions

def cli()

Expand source code

def cli():
    import argparse
    parser = argparse.ArgumentParser(description='CLI for pydload')

    parser.add_argument('url', type=str, help='URL of the file to be downloaded.')

    parser.add_argument('save_to_path', type=str, nargs='?', help='save as file path/name')

    parser.add_argument('--max_time', type=int, help='Maximum time to be spent on download')
    parser.add_argument('--timeout', type=int, help='Reuest timeout')

    args = parser.parse_args()

    url = args.url
    save_to_path = args.save_to_path
    max_time = args.max_time
    timeout = args.timeout
    if not timeout: timeout=10

    dload(url, save_to_path=save_to_path, timeout=timeout, max_time=max_time, verbose=True)

def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True)

Parameters:

url (str): URL of the file to be downloaded.

save_to_path (str): Save as. If not provided, will be saved in the working directory with file_name auto identified from url.

timeout (int): timeout for the initial handshake for requests.

max_time (int): Kill the download if it takes more than max_time seconds.

# Useful when you don't know the size of files before hand and don't want to download very large files.

verbose (bool default:True): self explanatory

Returns:

False if downloading failed or stopped based on max_time. file_path if download is successful.

Expand source code

def dload(url, save_to_path=None, timeout=10, max_time=30, verbose=True):
    '''

    Parameters:

    url (str): URL of the file to be downloaded.

    save_to_path (str): Save as. If not provided, will be saved in the working directory with file_name auto identified from url.

    timeout (int): timeout for the initial handshake for requests.

    max_time (int): Kill the download if it takes more than max_time seconds.

        # Useful when you don't know the size of files before hand and don't want to download very large files.
    
    verbose (bool default:True): self explanatory


    Returns:

    False if downloading failed or stopped based on max_time. file_path if download is successful.

    '''
    url = url.rstrip('/')
    if 'http://' not in url[:7] and 'https://' not in url[:8]:
        if verbose:
            logging.warn('Assuming http://')
        url = 'http://' + url

    if not save_to_path:
        save_to_path = url.split('/')[-1].split('?')[0]
        if not save_to_path.strip():
            save_to_path = url.split('/')[-2]

        if not save_to_path.strip():
            save_to_path = str(uuid.uuid4())
            print('Saving file as', save_to_path)

        if verbose:
            print('Saving the file at', save_to_path)

    if max_time:
        if verbose:
            print("The download will be auto-terminated in", max_time, "if not completed.")

    try:
        request = requests.get(url, timeout=timeout, stream=True, verify=True, allow_redirects=True)
    except:
        if verbose:
            print('SSL certificate not verified...')
        request = requests.get(url, timeout=timeout, stream=True, verify=False, allow_redirects=True)

    file_size = None
    try:
        file_size = (float(request.headers['Content-length'])// mb) + 1
    except:
        if verbose:
            print('Content-length not found, file size cannot be estimated.')
        pass

    is_stopped = False

    with open(save_to_path, 'wb') as f:
        start_time = time.time()
        if verbose:
            for chunk in progressbar.progressbar(request.iter_content(mb), max_value=file_size, prefix='MB'):
                f.write(chunk)
                if max_time:
                    if time.time() - start_time >= max_time:
                        is_stopped = True
                        break
        
        else:
            for chunk in request.iter_content(mb):
                f.write(chunk)
                if max_time:
                    if time.time() - start_time >= max_time:
                        is_stopped = True
                        break
    
    if is_stopped:
        if verbose:
            print('Stopped due to excess time')
        return False
    
    else:
        if verbose:
            print('Succefully Downloaded to:', save_to_path)
        return save_to_path