Solution: Parallel prime number calculation

September 2025

Exercise: Parallel prime number calculation

Sequential (old-fashioned) → simple loop, no dependencies, uses only built-in Python.
Parallel version (multiprocessing) → same logic, but split into chunks and run in parallel.

Sequential version

# primes_seq.py
# Sequential prime number calculation
# No pip install required (only built-in Python)

import sys

def is_prime(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

def primes_up_to(n):
    return [x for x in range(2, n + 1) if is_prime(x)]

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python primes_seq.py <n>")
        sys.exit(1)

    n = int(sys.argv[1])
    primes = primes_up_to(n)
    print(f"Primes up to {n}: {primes}")

👉 Run it with:

python primes_seq.py 50

Parallel version

# primes_parallel.py
# Parallel prime number calculation using multiprocessing
# No pip install required (only built-in Python)

import sys
from multiprocessing import Process, Queue, cpu_count

def is_prime(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

def primes_in_range(start, end, q):
    result = [x for x in range(start, end) if is_prime(x)]
    q.put(result)

if __name__ == "__main__":
    if len(sys.argv) != 2:
        print("Usage: python primes_parallel.py <n>")
        sys.exit(1)

    n = int(sys.argv[1])
    num_procs = cpu_count()  # use available cores
    chunk_size = n // num_procs + 1

    q = Queue()
    procs = []

    # Divide the range into chunks
    for i in range(num_procs):
        start = i * chunk_size + 2
        end = min((i + 1) * chunk_size + 2, n + 1)
        p = Process(target=primes_in_range, args=(start, end, q))
        p.start()
        procs.append(p)

    # Collect results
    all_primes = []
    for _ in procs:
        all_primes.extend(q.get())

    for p in procs:
        p.join()

    all_primes.sort()
    print(f"Primes up to {n}: {all_primes}")

👉 Run it with:

python primes_parallel.py 50

Let’s build a benchmarking helper script that:

Runs the prime calculation with different values of n (10k, 100k, 1M, 10M).
Varies the number of processes (1, 2, 4, 8).
Records elapsed time for each run.
Displays a bar chart comparing times (requires only matplotlib, so pip install matplotlib if students don’t already have it).

# benchmark_primes.py
# Compare sequential vs parallel prime calculation
# Requires matplotlib: pip install matplotlib

import time
import matplotlib.pyplot as plt
from multiprocessing import Process, Queue

# --- Prime helpers (same as before) ---
def is_prime(n):
    if n < 2:
        return False
    for i in range(2, int(n**0.5) + 1):
        if n % i == 0:
            return False
    return True

def primes_in_range(start, end, q):
    result = [x for x in range(start, end) if is_prime(x)]
    q.put(result)

def primes_parallel(n, num_procs):
    chunk_size = n // num_procs + 1
    q = Queue()
    procs = []

    for i in range(num_procs):
        start = i * chunk_size + 2
        end = min((i + 1) * chunk_size + 2, n + 1)
        p = Process(target=primes_in_range, args=(start, end, q))
        p.start()
        procs.append(p)

    all_primes = []
    for _ in procs:
        all_primes.extend(q.get())

    for p in procs:
        p.join()

    all_primes.sort()
    return all_primes

# --- Benchmarking logic ---
def benchmark(n, num_procs):
    t0 = time.time()
    primes_parallel(n, num_procs)
    return time.time() - t0

if __name__ == "__main__":
    ns = [10_000, 100_000, 1_000_000, 10_000_000]
    proc_counts = [1, 2, 4, 8]

    results = {}

    for n in ns:
        results[n] = []
        for p in proc_counts:
            print(f"Running n={n:,} with {p} process(es)...")
            elapsed = benchmark(n, p)
            results[n].append(elapsed)

    # --- Plot results ---
    fig, ax = plt.subplots(figsize=(10, 6))

    bar_width = 0.2
    x = range(len(ns))
    for i, p in enumerate(proc_counts):
        times = [results[n][i] for n in ns]
        ax.bar([xi + i * bar_width for xi in x], times,
               width=bar_width, label=f"{p} proc(s)")

    ax.set_xticks([xi + 1.5 * bar_width for xi in x])
    ax.set_xticklabels([f"{n:,}" for n in ns])
    ax.set_ylabel("Time (s)")
    ax.set_xlabel("n (max number checked)")
    ax.set_title("Prime calculation benchmark")
    ax.legend()
    plt.tight_layout()
    plt.show()

at 100k and bellow, the plot shows the parallelization overhead