Build a batch downloader from URLs

Building a batch downloader from URLs can be done using Python, which offers straightforward libraries like requests for downloading files and concurrent.futures or asyncio for handling multiple downloads efficiently.

Below is a complete Python script that:

Reads a list of URLs (can be from a file or a Python list).
Downloads each file concurrently.
Saves each file with a unique name (based on URL or content disposition).
Handles errors gracefully.

python
import os
import requests
from concurrent.futures import ThreadPoolExecutor, as_completed
from urllib.parse import urlparse, unquote

def get_filename_from_url(url):
    """Extract a filename from the URL path or fallback to a default name."""
    parsed_url = urlparse(url)
    filename = os.path.basename(parsed_url.path)
    if not filename:
        filename = "downloaded_file"
    return unquote(filename)

def download_file(url, save_folder):
    try:
        response = requests.get(url, stream=True, timeout=15)
        response.raise_for_status()

        filename = get_filename_from_url(url)
        filepath = os.path.join(save_folder, filename)

        # If filename exists, append a number to avoid overwrite
        base, ext = os.path.splitext(filename)
        counter = 1
        while os.path.exists(filepath):
            filepath = os.path.join(save_folder, f"{base}({counter}){ext}")
            counter += 1

        with open(filepath, "wb") as f:
            for chunk in response.iter_content(chunk_size=8192):
                if chunk:
                    f.write(chunk)

        print(f"Downloaded: {url} -> {filepath}")
        return filepath
    except requests.RequestException as e:
        print(f"Failed to download {url}: {e}")
        return None

def batch_download(urls, save_folder="downloads", max_workers=5):
    if not os.path.exists(save_folder):
        os.makedirs(save_folder)

    with ThreadPoolExecutor(max_workers=max_workers) as executor:
        futures = {executor.submit(download_file, url, save_folder): url for url in urls}

        for future in as_completed(futures):
            url = futures[future]
            try:
                future.result()
            except Exception as e:
                print(f"Error downloading {url}: {e}")

if __name__ == "__main__":
    # Example list of URLs to download
    urls = [
        "https://example.com/file1.jpg",
        "https://example.com/file2.pdf",
        "https://example.com/file3.zip",
    ]

    batch_download(urls)

How it works:

get_filename_from_url: Extracts a filename from the URL.
download_file: Downloads a single file with chunked writing to save memory.
batch_download: Uses a thread pool to download multiple files concurrently.
Prevents overwriting by adding suffix numbers to filenames if duplicates exist.
Handles connection timeouts and HTTP errors gracefully.
Saves files into a specified folder (downloads by default).

You can customize the urls list or modify the script to read URLs from a file if needed. Let me know if you want me to add that!

Share This Page:

How it works:

Comments

Leave a Reply Cancel reply

Check Out Our Newest Posts we wrote about

Write scripts to automate online shopping

Write a Python script to clean HTML files

Why You Need an AI Content Operations Strategy

Why You Need a Business Case for Every Model