Skip to content

Conversation

@Eclips4
Copy link

@Eclips4 Eclips4 commented Nov 15, 2025

Benchmark code:

import argparse
import base64
import os
import statistics
import timeit

try:
    import _base64 as rust_base64
except ImportError:
    raise SystemExit(
        "The Rust _base64 module is unavailable. Reconfigure with --with-rust-base64."
    )


COLUMNS: tuple[tuple[str, int], ...] = (
    ("size", 8),
    ("loops", 8),
    ("Python base64.standard_b64encode (µs)", 40),
    ("Rust _base64.standard_b64encode (µs)", 40),
    ("speedup", 9),
)
DEFAULT_SIZES = (32, 256, 4096, 16384)


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(
        description=(
            "Benchmark the CPython base64.standard_b64encode implementation against "
            "the experimental Rust port shipped as _base64."
        )
    )
    parser.add_argument(
        "--sizes",
        metavar="N",
        type=int,
        nargs="+",
        default=list(DEFAULT_SIZES),
        help="Payload sizes (in bytes) to benchmark. Defaults to %(default)s.",
    )
    parser.add_argument(
        "--repeat",
        type=int,
        default=5,
        help="How many timer repeats to run for each size (default: %(default)s).",
    )
    parser.add_argument(
        "--target-bytes-per-run",
        type=int,
        default=1_000_000,
        help=(
            "Approximate amount of data processed per timing run. "
            "This value is turned into a loop count based on the payload size."
        ),
    )
    parser.add_argument(
        "--minimum-iterations",
        type=int,
        default=200,
        help="Lower bound for how many calls are performed in each timing run.",
    )
    return parser.parse_args()


def iterations_for_size(size: int, *, target_bytes: int, minimum: int) -> int:
    if size <= 0:
        raise ValueError("Payload size must be a positive integer.")
    per_run = max(target_bytes // size, 1)
    return max(per_run, minimum)


def benchmark(func, payload: bytes, *, number: int, repeat: int) -> tuple[float, float]:
    timer = timeit.Timer(lambda: func(payload))
    runs = timer.repeat(repeat=repeat, number=number)
    per_call = [elapsed / number for elapsed in runs]
    return min(per_call), statistics.mean(per_call)


def encode_and_validate(payload: bytes) -> None:
    expected = base64.standard_b64encode(payload)
    candidate = rust_base64.standard_b64encode(payload)
    if expected != candidate:
        raise RuntimeError(
            "Mismatch detected between base64.b64encode and _base64.b64encode."
        )


def format_us(value: float) -> str:
    return f"{value * 1e6:.3f}"


def print_row(values: list[str]) -> None:
    padded = [
        text.rjust(width) for text, (_, width) in zip(values, COLUMNS, strict=True)
    ]
    print(" ".join(padded))


def main() -> None:
    args = parse_args()

    print_row([title for title, _ in COLUMNS])
    for size in args.sizes:
        payload = os.urandom(size)
        loops = iterations_for_size(
            size, target_bytes=args.target_bytes_per_run, minimum=args.minimum_iterations
        )
        encode_and_validate(payload)
        py_best, py_mean = benchmark(base64.standard_b64encode, payload, number=loops, repeat=args.repeat)
        rust_best, rust_mean = benchmark(rust_base64.standard_b64encode, payload, number=loops, repeat=args.repeat)
        speedup = py_best / rust_best if rust_best else float("inf")

        print_row(
            [
                str(size),
                str(loops),
                format_us(py_best),
                format_us(rust_best),
                f"{speedup:.3f}",
            ]
        )


if __name__ == "__main__":
    main()

Results on my macbook pro m3 pro:

    size    loops    Python base64.standard_b64encode (µs)     Rust _base64.standard_b64encode (µs)   speedup
      32    31250                                    0.101                                    0.043     2.341
     256     3906                                    0.285                                    0.144     1.978
    4096      244                                    2.750                                    1.899     1.448
   16384      200                                   11.669                                    7.642     1.527

CPython was built with --enable-optimizations, _base64 was built in release mode (cargo build -p --release -p _base64)

@Eclips4 Eclips4 requested a review from emmatyping as a code owner November 15, 2025 21:00
@emmatyping emmatyping merged commit b8d6ea3 into emmatyping:rust-in-cpython Nov 16, 2025
@Eclips4 Eclips4 deleted the faster-b64encode branch November 16, 2025 11:27
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

2 participants