| """ |
| ================================================================================ |
| PDF MANIPULATOR - Full-Featured PDF Page Manipulation Toolkit |
| ================================================================================ |
| Author : algorembrant |
| Version : 1.0.0 |
| License : MIT |
| |
| USAGE COMMANDS (run from terminal): |
| -------------------------------------------------------------------------------- |
| |
| MERGE |
| python pdf_manipulator.py merge -i file1.pdf file2.pdf file3.pdf -o merged.pdf |
| python pdf_manipulator.py merge -i file1.pdf file2.pdf -o out.pdf --interleave |
| |
| SPLIT |
| python pdf_manipulator.py split -i input.pdf -o ./output_dir |
| python pdf_manipulator.py split -i input.pdf -o ./output_dir --range 1-5 |
| python pdf_manipulator.py split -i input.pdf -o ./output_dir --range 2,4,6 |
| |
| REMOVE PAGES |
| python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 3 |
| python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 1,3,5 |
| python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 2-5 |
| python pdf_manipulator.py remove -i input.pdf -o output.pdf --pages 1,3-5,7 |
| |
| EXTRACT PAGES |
| python pdf_manipulator.py extract -i input.pdf -o output.pdf --pages 1-3 |
| python pdf_manipulator.py extract -i input.pdf -o output.pdf --pages 2,4,6 |
| |
| REORDER PAGES |
| python pdf_manipulator.py reorder -i input.pdf -o output.pdf --order 3,1,2,4 |
| |
| ROTATE PAGES |
| python pdf_manipulator.py rotate -i input.pdf -o output.pdf --angle 90 |
| python pdf_manipulator.py rotate -i input.pdf -o output.pdf --angle 180 --pages 1,3 |
| python pdf_manipulator.py rotate -i input.pdf -o output.pdf --angle 270 --pages 2-4 |
| |
| REVERSE |
| python pdf_manipulator.py reverse -i input.pdf -o output.pdf |
| |
| DUPLICATE PAGES |
| python pdf_manipulator.py duplicate -i input.pdf -o output.pdf --pages 2 --times 3 |
| |
| INSERT BLANK PAGES |
| python pdf_manipulator.py insert-blank -i input.pdf -o output.pdf --after 2 |
| python pdf_manipulator.py insert-blank -i input.pdf -o output.pdf --before 1 |
| |
| INSERT PDF PAGES |
| python pdf_manipulator.py insert -i base.pdf --insert-file extra.pdf -o output.pdf --after 3 |
| python pdf_manipulator.py insert -i base.pdf --insert-file extra.pdf -o output.pdf --before 2 |
| |
| REPLACE PAGES |
| python pdf_manipulator.py replace -i base.pdf --replace-file new.pdf -o output.pdf --pages 2 --replace-pages 1 |
| |
| CROP PAGES |
| python pdf_manipulator.py crop -i input.pdf -o output.pdf --box "50,50,500,700" |
| python pdf_manipulator.py crop -i input.pdf -o output.pdf --box "50,50,500,700" --pages 1-3 |
| |
| SCALE / RESIZE |
| python pdf_manipulator.py scale -i input.pdf -o output.pdf --factor 0.5 |
| python pdf_manipulator.py scale -i input.pdf -o output.pdf --to-size A4 |
| python pdf_manipulator.py scale -i input.pdf -o output.pdf --to-size letter |
| |
| WATERMARK |
| python pdf_manipulator.py watermark -i input.pdf -o output.pdf --text "CONFIDENTIAL" |
| python pdf_manipulator.py watermark -i input.pdf -o output.pdf --text "DRAFT" --opacity 0.3 --angle 45 |
| python pdf_manipulator.py watermark -i input.pdf -o output.pdf --watermark-pdf wm.pdf |
| |
| STAMP / OVERLAY |
| python pdf_manipulator.py stamp -i input.pdf -o output.pdf --stamp-pdf stamp.pdf |
| python pdf_manipulator.py stamp -i input.pdf -o output.pdf --stamp-pdf stamp.pdf --pages 1 |
| |
| ADD PAGE NUMBERS |
| python pdf_manipulator.py number -i input.pdf -o output.pdf |
| python pdf_manipulator.py number -i input.pdf -o output.pdf --position bottom-center --start 1 |
| python pdf_manipulator.py number -i input.pdf -o output.pdf --position top-right --format "Page {n}" |
| |
| ENCRYPT / DECRYPT |
| python pdf_manipulator.py encrypt -i input.pdf -o output.pdf --user-pass mypass --owner-pass ownerpass |
| python pdf_manipulator.py encrypt -i input.pdf -o output.pdf --user-pass mypass |
| python pdf_manipulator.py decrypt -i encrypted.pdf -o decrypted.pdf --password mypass |
| |
| METADATA |
| python pdf_manipulator.py metadata -i input.pdf |
| python pdf_manipulator.py metadata -i input.pdf -o output.pdf --set-title "My Title" --set-author "algorembrant" |
| python pdf_manipulator.py metadata -i input.pdf -o output.pdf --set-subject "Report" --set-keywords "pdf,report" |
| |
| BOOKMARKS / OUTLINE |
| python pdf_manipulator.py bookmarks -i input.pdf |
| python pdf_manipulator.py bookmarks -i input.pdf -o output.pdf --add "Chapter 1:1,Chapter 2:5" |
| |
| EXTRACT TEXT |
| python pdf_manipulator.py text -i input.pdf |
| python pdf_manipulator.py text -i input.pdf --pages 1-3 -o extracted.txt |
| |
| INFO / INSPECT |
| python pdf_manipulator.py info -i input.pdf |
| |
| N-UP (multiple pages per sheet) |
| python pdf_manipulator.py nup -i input.pdf -o output.pdf --layout 2x1 |
| python pdf_manipulator.py nup -i input.pdf -o output.pdf --layout 2x2 |
| |
| COMPRESS |
| python pdf_manipulator.py compress -i input.pdf -o output.pdf |
| |
| BATCH OPERATIONS |
| python pdf_manipulator.py batch-remove --dir ./pdfs --pages 1 --suffix _no_cover |
| python pdf_manipulator.py batch-merge --dir ./pdfs -o merged_all.pdf |
| python pdf_manipulator.py batch-split --dir ./pdfs --out-dir ./split_output |
| |
| -------------------------------------------------------------------------------- |
| PAGE RANGE SYNTAX: |
| Single page : 3 |
| Multiple pages: 1,3,5 |
| Range : 2-5 (inclusive) |
| Mixed : 1,3-5,7,9-11 |
| Pages are always 1-indexed (first page = 1) |
| -------------------------------------------------------------------------------- |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import io |
| import os |
| import re |
| import sys |
| import glob |
| from copy import deepcopy |
| from pathlib import Path |
| from typing import List, Optional, Tuple |
|
|
| from pypdf import PdfReader, PdfWriter |
| from pypdf.generic import NameObject, NumberObject |
| from reportlab.lib.pagesizes import A4, letter, A3, A5, LETTER |
| from reportlab.lib.units import mm, inch |
| from reportlab.pdfgen import canvas as rl_canvas |
| from reportlab.lib import colors |
|
|
|
|
| |
| |
| |
|
|
| PAGE_SIZES = { |
| "a3": A3, |
| "a4": A4, |
| "a5": A5, |
| "letter": letter, |
| "LETTER": LETTER, |
| } |
|
|
| NUMBER_POSITIONS = { |
| "bottom-center": lambda w, h: (w / 2, 20), |
| "bottom-left": lambda w, h: (30, 20), |
| "bottom-right": lambda w, h: (w - 30, 20), |
| "top-center": lambda w, h: (w / 2, h - 20), |
| "top-left": lambda w, h: (30, h - 20), |
| "top-right": lambda w, h: (w - 30, h - 20), |
| } |
|
|
|
|
| |
| |
| |
|
|
| def parse_page_range(spec: str, total: int) -> List[int]: |
| """ |
| Parse a page-range string into a sorted list of 0-based indices. |
| Input is 1-based, e.g. "1,3-5,7" -> [0, 2, 3, 4, 6] |
| """ |
| indices: set[int] = set() |
| for part in spec.split(","): |
| part = part.strip() |
| if "-" in part: |
| a, b = part.split("-", 1) |
| a_i, b_i = int(a.strip()), int(b.strip()) |
| if a_i < 1 or b_i > total or a_i > b_i: |
| raise ValueError( |
| f"Range {a_i}-{b_i} is out of bounds (document has {total} pages)." |
| ) |
| indices.update(range(a_i - 1, b_i)) |
| else: |
| n = int(part) |
| if n < 1 or n > total: |
| raise ValueError( |
| f"Page {n} is out of bounds (document has {total} pages)." |
| ) |
| indices.add(n - 1) |
| return sorted(indices) |
|
|
|
|
| def open_pdf(path: str, password: Optional[str] = None) -> PdfReader: |
| reader = PdfReader(path) |
| if reader.is_encrypted: |
| if password is None: |
| password = "" |
| reader.decrypt(password) |
| return reader |
|
|
|
|
| def save_pdf(writer: PdfWriter, output_path: str) -> None: |
| out = Path(output_path) |
| out.parent.mkdir(parents=True, exist_ok=True) |
| with open(out, "wb") as f: |
| writer.write(f) |
| print(f"[OK] Saved -> {out.resolve()}") |
|
|
|
|
| def page_count(path: str) -> int: |
| return len(open_pdf(path).pages) |
|
|
|
|
| def make_watermark_pdf( |
| text: str, |
| page_width: float, |
| page_height: float, |
| opacity: float = 0.15, |
| angle: float = 45, |
| font_size: int = 60, |
| ) -> io.BytesIO: |
| buf = io.BytesIO() |
| c = rl_canvas.Canvas(buf, pagesize=(page_width, page_height)) |
| c.setFont("Helvetica-Bold", font_size) |
| c.setFillColor(colors.red, alpha=opacity) |
| c.saveState() |
| c.translate(page_width / 2, page_height / 2) |
| c.rotate(angle) |
| c.drawCentredString(0, 0, text) |
| c.restoreState() |
| c.save() |
| buf.seek(0) |
| return buf |
|
|
|
|
| def make_page_number_pdf( |
| number_str: str, |
| page_width: float, |
| page_height: float, |
| position: str = "bottom-center", |
| font_size: int = 10, |
| ) -> io.BytesIO: |
| buf = io.BytesIO() |
| c = rl_canvas.Canvas(buf, pagesize=(page_width, page_height)) |
| c.setFont("Helvetica", font_size) |
| c.setFillColor(colors.black) |
| pos_func = NUMBER_POSITIONS.get(position, NUMBER_POSITIONS["bottom-center"]) |
| x, y = pos_func(page_width, page_height) |
| c.drawCentredString(x, y, number_str) |
| c.save() |
| buf.seek(0) |
| return buf |
|
|
|
|
| |
| |
| |
|
|
| def cmd_merge(args: argparse.Namespace) -> None: |
| """Merge multiple PDFs into one.""" |
| writer = PdfWriter() |
| files = args.inputs |
|
|
| if args.interleave: |
| readers = [open_pdf(f) for f in files] |
| max_pages = max(len(r.pages) for r in readers) |
| for i in range(max_pages): |
| for r in readers: |
| if i < len(r.pages): |
| writer.add_page(r.pages[i]) |
| else: |
| for f in files: |
| reader = open_pdf(f) |
| for page in reader.pages: |
| writer.add_page(page) |
|
|
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_split(args: argparse.Namespace) -> None: |
| """Split a PDF into individual pages or ranges.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| out_dir = Path(args.output) |
| out_dir.mkdir(parents=True, exist_ok=True) |
| stem = Path(args.input).stem |
|
|
| if args.range: |
| indices = parse_page_range(args.range, total) |
| writer = PdfWriter() |
| for idx in indices: |
| writer.add_page(reader.pages[idx]) |
| out_path = out_dir / f"{stem}_pages_{args.range.replace(',', '_')}.pdf" |
| save_pdf(writer, str(out_path)) |
| else: |
| for i, page in enumerate(reader.pages): |
| writer = PdfWriter() |
| writer.add_page(page) |
| out_path = out_dir / f"{stem}_page_{i + 1:04d}.pdf" |
| save_pdf(writer, str(out_path)) |
|
|
|
|
| def cmd_remove(args: argparse.Namespace) -> None: |
| """Remove specified pages from a PDF.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| to_remove = set(parse_page_range(args.pages, total)) |
|
|
| writer = PdfWriter() |
| for i, page in enumerate(reader.pages): |
| if i not in to_remove: |
| writer.add_page(page) |
|
|
| if len(writer.pages) == 0: |
| print("[WARN] All pages removed - output file will have 0 pages.") |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_extract(args: argparse.Namespace) -> None: |
| """Extract specific pages into a new PDF.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| indices = parse_page_range(args.pages, total) |
|
|
| writer = PdfWriter() |
| for idx in indices: |
| writer.add_page(reader.pages[idx]) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_reorder(args: argparse.Namespace) -> None: |
| """Reorder pages according to a specified order.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| order = [int(x.strip()) - 1 for x in args.order.split(",")] |
|
|
| for idx in order: |
| if idx < 0 or idx >= total: |
| raise ValueError(f"Page {idx + 1} is out of bounds (document has {total} pages).") |
|
|
| writer = PdfWriter() |
| for idx in order: |
| writer.add_page(reader.pages[idx]) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_rotate(args: argparse.Namespace) -> None: |
| """Rotate pages by a given angle (90, 180, 270).""" |
| if args.angle not in (90, 180, 270): |
| raise ValueError("Rotation angle must be 90, 180, or 270.") |
|
|
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| indices = set(parse_page_range(args.pages, total)) if args.pages else set(range(total)) |
|
|
| writer = PdfWriter() |
| for i, page in enumerate(reader.pages): |
| if i in indices: |
| page.rotate(args.angle) |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_reverse(args: argparse.Namespace) -> None: |
| """Reverse the page order of a PDF.""" |
| reader = open_pdf(args.input) |
| writer = PdfWriter() |
| for page in reversed(reader.pages): |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_duplicate(args: argparse.Namespace) -> None: |
| """Duplicate specific pages N times and insert them consecutively.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| indices = set(parse_page_range(args.pages, total)) |
| times = args.times |
|
|
| writer = PdfWriter() |
| for i, page in enumerate(reader.pages): |
| if i in indices: |
| for _ in range(times): |
| writer.add_page(deepcopy(page)) |
| else: |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_insert_blank(args: argparse.Namespace) -> None: |
| """Insert one or more blank pages into a PDF.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| pages_list = list(reader.pages) |
|
|
| |
| ref_page = pages_list[0] |
| width = float(ref_page.mediabox.width) |
| height = float(ref_page.mediabox.height) |
|
|
| |
| blank_buf = io.BytesIO() |
| c = rl_canvas.Canvas(blank_buf, pagesize=(width, height)) |
| c.save() |
| blank_buf.seek(0) |
| blank_reader = PdfReader(blank_buf) |
| blank_page = blank_reader.pages[0] |
|
|
| if args.after is not None: |
| insert_idx = args.after |
| if insert_idx < 0 or insert_idx > total: |
| raise ValueError(f"--after {args.after} is out of range.") |
| pages_list.insert(insert_idx, blank_page) |
| elif args.before is not None: |
| insert_idx = args.before - 1 |
| if insert_idx < 0 or insert_idx > total: |
| raise ValueError(f"--before {args.before} is out of range.") |
| pages_list.insert(insert_idx, blank_page) |
| else: |
| raise ValueError("Specify --after N or --before N.") |
|
|
| writer = PdfWriter() |
| for p in pages_list: |
| writer.add_page(p) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_insert_pdf(args: argparse.Namespace) -> None: |
| """Insert pages from another PDF into the base PDF.""" |
| base_reader = open_pdf(args.input) |
| ins_reader = open_pdf(args.insert_file) |
| base_pages = list(base_reader.pages) |
| ins_pages = list(ins_reader.pages) |
|
|
| if args.after is not None: |
| pos = args.after |
| elif args.before is not None: |
| pos = args.before - 1 |
| else: |
| raise ValueError("Specify --after N or --before N.") |
|
|
| result = base_pages[:pos] + ins_pages + base_pages[pos:] |
| writer = PdfWriter() |
| for p in result: |
| writer.add_page(p) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_replace(args: argparse.Namespace) -> None: |
| """Replace specific pages in the base PDF with pages from another PDF.""" |
| base_reader = open_pdf(args.input) |
| rep_reader = open_pdf(args.replace_file) |
| total_base = len(base_reader.pages) |
| total_rep = len(rep_reader.pages) |
|
|
| base_indices = parse_page_range(args.pages, total_base) |
| rep_indices = parse_page_range(args.replace_pages, total_rep) |
|
|
| if len(base_indices) != len(rep_indices): |
| raise ValueError( |
| f"Number of pages to replace ({len(base_indices)}) must match " |
| f"number of replacement pages ({len(rep_indices)})." |
| ) |
|
|
| replace_map = dict(zip(base_indices, rep_indices)) |
|
|
| writer = PdfWriter() |
| for i, page in enumerate(base_reader.pages): |
| if i in replace_map: |
| writer.add_page(rep_reader.pages[replace_map[i]]) |
| else: |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_crop(args: argparse.Namespace) -> None: |
| """Crop pages to a specific bounding box (left,bottom,right,top).""" |
| box_vals = [float(v) for v in args.box.split(",")] |
| if len(box_vals) != 4: |
| raise ValueError("--box must be 'left,bottom,right,top'.") |
| left, bottom, right, top = box_vals |
|
|
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| indices = set(parse_page_range(args.pages, total)) if args.pages else set(range(total)) |
|
|
| writer = PdfWriter() |
| for i, page in enumerate(reader.pages): |
| if i in indices: |
| page.mediabox.lower_left = (left, bottom) |
| page.mediabox.upper_right = (right, top) |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_scale(args: argparse.Namespace) -> None: |
| """Scale pages by a factor or resize to a standard page size.""" |
| reader = open_pdf(args.input) |
| writer = PdfWriter() |
|
|
| for page in reader.pages: |
| orig_w = float(page.mediabox.width) |
| orig_h = float(page.mediabox.height) |
|
|
| if args.factor: |
| f = args.factor |
| page.scale(f, f) |
| elif args.to_size: |
| target = PAGE_SIZES.get(args.to_size.lower()) |
| if target is None: |
| raise ValueError(f"Unknown page size: {args.to_size}. Choose from {list(PAGE_SIZES.keys())}") |
| tw, th = target |
| fx = tw / orig_w |
| fy = th / orig_h |
| page.scale(fx, fy) |
|
|
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_watermark(args: argparse.Namespace) -> None: |
| """Add a text or PDF watermark to each page.""" |
| reader = open_pdf(args.input) |
| writer = PdfWriter() |
|
|
| for page in reader.pages: |
| w = float(page.mediabox.width) |
| h = float(page.mediabox.height) |
|
|
| if args.watermark_pdf: |
| wm_reader = open_pdf(args.watermark_pdf) |
| wm_page = wm_reader.pages[0] |
| else: |
| text = args.text or "WATERMARK" |
| opacity = args.opacity if args.opacity else 0.15 |
| angle = args.angle if args.angle else 45 |
| wm_buf = make_watermark_pdf(text, w, h, opacity=opacity, angle=angle) |
| wm_reader = PdfReader(wm_buf) |
| wm_page = wm_reader.pages[0] |
|
|
| page.merge_page(wm_page) |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_stamp(args: argparse.Namespace) -> None: |
| """Overlay a stamp PDF on top of pages.""" |
| reader = open_pdf(args.input) |
| stamp_reader = open_pdf(args.stamp_pdf) |
| stamp_page = stamp_reader.pages[0] |
| total = len(reader.pages) |
| indices = set(parse_page_range(args.pages, total)) if args.pages else set(range(total)) |
|
|
| writer = PdfWriter() |
| for i, page in enumerate(reader.pages): |
| if i in indices: |
| page.merge_page(stamp_page) |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_number(args: argparse.Namespace) -> None: |
| """Add page numbers to each page.""" |
| reader = open_pdf(args.input) |
| writer = PdfWriter() |
| position = args.position or "bottom-center" |
| start = args.start if args.start else 1 |
| fmt = args.format or "{n}" |
|
|
| for i, page in enumerate(reader.pages): |
| w = float(page.mediabox.width) |
| h = float(page.mediabox.height) |
| number_str = fmt.replace("{n}", str(i + start)) |
| num_buf = make_page_number_pdf(number_str, w, h, position=position) |
| num_reader = PdfReader(num_buf) |
| page.merge_page(num_reader.pages[0]) |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_encrypt(args: argparse.Namespace) -> None: |
| """Encrypt a PDF with user and owner passwords.""" |
| reader = open_pdf(args.input) |
| writer = PdfWriter() |
| for page in reader.pages: |
| writer.add_page(page) |
| user_pw = args.user_pass or "" |
| owner_pw = args.owner_pass or args.user_pass or "" |
| writer.encrypt(user_pw, owner_pw) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_decrypt(args: argparse.Namespace) -> None: |
| """Decrypt / remove password from a PDF.""" |
| reader = open_pdf(args.input, password=args.password) |
| if not reader.is_encrypted and not args.password: |
| print("[INFO] File is not encrypted.") |
| writer = PdfWriter() |
| for page in reader.pages: |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_metadata(args: argparse.Namespace) -> None: |
| """View or set PDF metadata.""" |
| reader = open_pdf(args.input) |
| meta = reader.metadata |
| print("\n--- PDF Metadata ---") |
| print(f" Title : {meta.title}") |
| print(f" Author : {meta.author}") |
| print(f" Subject : {meta.subject}") |
| print(f" Keywords : {meta.get('/Keywords', '')}") |
| print(f" Creator : {meta.creator}") |
| print(f" Producer : {meta.producer}") |
| print(f" Created : {meta.get('/CreationDate', '')}") |
| print(f" Modified : {meta.get('/ModDate', '')}") |
| print() |
|
|
| if args.output and any([args.set_title, args.set_author, args.set_subject, args.set_keywords]): |
| writer = PdfWriter() |
| for page in reader.pages: |
| writer.add_page(page) |
| new_meta = {} |
| if args.set_title: |
| new_meta["/Title"] = args.set_title |
| if args.set_author: |
| new_meta["/Author"] = args.set_author |
| if args.set_subject: |
| new_meta["/Subject"] = args.set_subject |
| if args.set_keywords: |
| new_meta["/Keywords"] = args.set_keywords |
| writer.add_metadata(new_meta) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_bookmarks(args: argparse.Namespace) -> None: |
| """List or add bookmarks/outline entries.""" |
| reader = open_pdf(args.input) |
| outlines = reader.outline |
|
|
| def _print_outline(items, indent=0): |
| for item in items: |
| if isinstance(item, list): |
| _print_outline(item, indent + 2) |
| else: |
| try: |
| title = item.title |
| page_obj = reader.get_destination_page_number(item) |
| print(f"{' ' * indent} {title} (page {page_obj + 1})") |
| except Exception: |
| pass |
|
|
| print("\n--- Bookmarks / Outline ---") |
| if outlines: |
| _print_outline(outlines) |
| else: |
| print(" (none)") |
| print() |
|
|
| if args.output and args.add: |
| writer = PdfWriter() |
| for page in reader.pages: |
| writer.add_page(page) |
| for entry in args.add.split(","): |
| title, pg = entry.strip().split(":") |
| writer.add_outline_item(title.strip(), int(pg.strip()) - 1) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_text(args: argparse.Namespace) -> None: |
| """Extract text from PDF pages.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| indices = parse_page_range(args.pages, total) if args.pages else list(range(total)) |
|
|
| lines = [] |
| for idx in indices: |
| text = reader.pages[idx].extract_text() or "" |
| lines.append(f"=== Page {idx + 1} ===\n{text}\n") |
|
|
| full_text = "\n".join(lines) |
|
|
| if args.output: |
| with open(args.output, "w", encoding="utf-8") as f: |
| f.write(full_text) |
| print(f"[OK] Text saved -> {args.output}") |
| else: |
| print(full_text) |
|
|
|
|
| def cmd_info(args: argparse.Namespace) -> None: |
| """Display detailed information about a PDF.""" |
| reader = open_pdf(args.input) |
| total = len(reader.pages) |
| meta = reader.metadata |
| print("\n--- PDF Info ---") |
| print(f" File : {args.input}") |
| print(f" Pages : {total}") |
| print(f" Encrypted : {reader.is_encrypted}") |
| print(f" Title : {meta.title}") |
| print(f" Author : {meta.author}") |
| print() |
| print(" Page Dimensions:") |
| for i, page in enumerate(reader.pages): |
| w = float(page.mediabox.width) |
| h = float(page.mediabox.height) |
| print(f" Page {i + 1:4d}: {w:.1f} x {h:.1f} pt ({w/72:.2f} x {h/72:.2f} in)") |
| print() |
|
|
|
|
| def cmd_nup(args: argparse.Namespace) -> None: |
| """Arrange N pages per output sheet (e.g. 2x1, 2x2).""" |
| layout = args.layout.lower() |
| try: |
| cols, rows = [int(x) for x in layout.split("x")] |
| except ValueError: |
| raise ValueError("--layout must be CxR, e.g. 2x1 or 2x2") |
|
|
| reader = open_pdf(args.input) |
| per_sheet = cols * rows |
| total = len(reader.pages) |
|
|
| |
| first_page = reader.pages[0] |
| pw = float(first_page.mediabox.width) |
| ph = float(first_page.mediabox.height) |
| cell_w = pw / cols |
| cell_h = ph / rows |
| sheet_w = pw |
| sheet_h = ph |
|
|
| writer = PdfWriter() |
|
|
| i = 0 |
| while i < total: |
| buf = io.BytesIO() |
| c = rl_canvas.Canvas(buf, pagesize=(sheet_w, sheet_h)) |
|
|
| for slot in range(per_sheet): |
| if i + slot >= total: |
| break |
| col = slot % cols |
| row = slot // cols |
| x_off = col * cell_w |
| y_off = sheet_h - (row + 1) * cell_h |
|
|
| |
| sub_buf = io.BytesIO() |
| sub_writer = PdfWriter() |
| sub_writer.add_page(reader.pages[i + slot]) |
| sub_writer.write(sub_buf) |
| sub_buf.seek(0) |
|
|
| from reportlab.lib.utils import ImageReader |
| from pdf2image import convert_from_bytes |
| imgs = convert_from_bytes(sub_buf.read(), dpi=72) |
| if imgs: |
| img = imgs[0] |
| c.drawInlineImage(img, x_off, y_off, width=cell_w, height=cell_h) |
|
|
| c.save() |
| buf.seek(0) |
| nup_reader = PdfReader(buf) |
| writer.add_page(nup_reader.pages[0]) |
| i += per_sheet |
|
|
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_compress(args: argparse.Namespace) -> None: |
| """Apply lossless compression to all page streams.""" |
| reader = open_pdf(args.input) |
| writer = PdfWriter() |
| for page in reader.pages: |
| writer.add_page(page) |
| writer.compress_identical_objects(remove_identicals=True, remove_orphans=True) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_batch_remove(args: argparse.Namespace) -> None: |
| """Remove pages from all PDFs in a directory.""" |
| pdfs = sorted(glob.glob(os.path.join(args.dir, "*.pdf"))) |
| suffix = args.suffix or "_modified" |
| for pdf_path in pdfs: |
| stem = Path(pdf_path).stem |
| out_path = os.path.join(args.dir, f"{stem}{suffix}.pdf") |
| reader = open_pdf(pdf_path) |
| total = len(reader.pages) |
| try: |
| to_remove = set(parse_page_range(args.pages, total)) |
| except ValueError as e: |
| print(f"[SKIP] {pdf_path}: {e}") |
| continue |
| writer = PdfWriter() |
| for i, page in enumerate(reader.pages): |
| if i not in to_remove: |
| writer.add_page(page) |
| save_pdf(writer, out_path) |
|
|
|
|
| def cmd_batch_merge(args: argparse.Namespace) -> None: |
| """Merge all PDFs in a directory into one.""" |
| pdfs = sorted(glob.glob(os.path.join(args.dir, "*.pdf"))) |
| writer = PdfWriter() |
| for pdf_path in pdfs: |
| reader = open_pdf(pdf_path) |
| for page in reader.pages: |
| writer.add_page(page) |
| save_pdf(writer, args.output) |
|
|
|
|
| def cmd_batch_split(args: argparse.Namespace) -> None: |
| """Split all PDFs in a directory into individual pages.""" |
| pdfs = sorted(glob.glob(os.path.join(args.dir, "*.pdf"))) |
| out_dir = Path(args.out_dir) |
| out_dir.mkdir(parents=True, exist_ok=True) |
| for pdf_path in pdfs: |
| stem = Path(pdf_path).stem |
| reader = open_pdf(pdf_path) |
| for i, page in enumerate(reader.pages): |
| writer = PdfWriter() |
| writer.add_page(page) |
| out_path = out_dir / f"{stem}_page_{i + 1:04d}.pdf" |
| save_pdf(writer, str(out_path)) |
|
|
|
|
| |
| |
| |
|
|
| def build_parser() -> argparse.ArgumentParser: |
| parser = argparse.ArgumentParser( |
| prog="pdf_manipulator", |
| description="Full-featured PDF page manipulation toolkit by algorembrant", |
| formatter_class=argparse.RawDescriptionHelpFormatter, |
| ) |
| sub = parser.add_subparsers(dest="command", required=True) |
|
|
| |
| p = sub.add_parser("merge", help="Merge multiple PDFs") |
| p.add_argument("-i", "--inputs", nargs="+", required=True, metavar="FILE") |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--interleave", action="store_true", help="Interleave pages from each file") |
|
|
| |
| p = sub.add_parser("split", help="Split PDF into pages or a range") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True, help="Output directory") |
| p.add_argument("--range", help="Page range to extract (e.g. 1-5 or 2,4,6)") |
|
|
| |
| p = sub.add_parser("remove", help="Remove pages from a PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--pages", required=True, help="Pages to remove, e.g. 1 or 1,3-5") |
|
|
| |
| p = sub.add_parser("extract", help="Extract pages to a new PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--pages", required=True, help="Pages to extract, e.g. 1-3") |
|
|
| |
| p = sub.add_parser("reorder", help="Reorder pages") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--order", required=True, help="New order, e.g. 3,1,2,4") |
|
|
| |
| p = sub.add_parser("rotate", help="Rotate pages") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--angle", required=True, type=int, choices=[90, 180, 270]) |
| p.add_argument("--pages", help="Pages to rotate (all if omitted)") |
|
|
| |
| p = sub.add_parser("reverse", help="Reverse page order") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
|
|
| |
| p = sub.add_parser("duplicate", help="Duplicate specified pages") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--pages", required=True, help="Pages to duplicate") |
| p.add_argument("--times", type=int, default=2, help="Number of copies (default 2)") |
|
|
| |
| p = sub.add_parser("insert-blank", help="Insert blank page(s)") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--after", type=int, help="Insert after page N (1-indexed)") |
| p.add_argument("--before", type=int, help="Insert before page N (1-indexed)") |
|
|
| |
| p = sub.add_parser("insert", help="Insert pages from another PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--insert-file", required=True) |
| p.add_argument("--after", type=int, help="Insert after page N") |
| p.add_argument("--before", type=int, help="Insert before page N") |
|
|
| |
| p = sub.add_parser("replace", help="Replace pages with pages from another PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--replace-file", required=True) |
| p.add_argument("--pages", required=True, help="Pages in base to replace") |
| p.add_argument("--replace-pages", required=True, help="Pages in replacement file to use") |
|
|
| |
| p = sub.add_parser("crop", help="Crop pages to a bounding box") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--box", required=True, help="left,bottom,right,top in points") |
| p.add_argument("--pages", help="Pages to crop (all if omitted)") |
|
|
| |
| p = sub.add_parser("scale", help="Scale or resize pages") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--factor", type=float, help="Scale factor, e.g. 0.5") |
| p.add_argument("--to-size", help="Target page size: a4, a3, a5, letter") |
|
|
| |
| p = sub.add_parser("watermark", help="Add watermark to pages") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--text", help="Watermark text") |
| p.add_argument("--opacity", type=float, default=0.15) |
| p.add_argument("--angle", type=float, default=45.0) |
| p.add_argument("--watermark-pdf", help="Use a PDF as watermark instead of text") |
|
|
| |
| p = sub.add_parser("stamp", help="Overlay a stamp PDF on pages") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--stamp-pdf", required=True) |
| p.add_argument("--pages", help="Pages to stamp (all if omitted)") |
|
|
| |
| p = sub.add_parser("number", help="Add page numbers") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--position", default="bottom-center", |
| choices=list(NUMBER_POSITIONS.keys())) |
| p.add_argument("--start", type=int, default=1) |
| p.add_argument("--format", default="{n}", help="Number format, use {n} for page number") |
|
|
| |
| p = sub.add_parser("encrypt", help="Encrypt a PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--user-pass", required=True) |
| p.add_argument("--owner-pass", default=None) |
|
|
| |
| p = sub.add_parser("decrypt", help="Remove password from PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--password", required=True) |
|
|
| |
| p = sub.add_parser("metadata", help="View or edit PDF metadata") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", default=None) |
| p.add_argument("--set-title") |
| p.add_argument("--set-author") |
| p.add_argument("--set-subject") |
| p.add_argument("--set-keywords") |
|
|
| |
| p = sub.add_parser("bookmarks", help="List or add bookmarks") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", default=None) |
| p.add_argument("--add", help="Bookmarks to add: 'Title:page,Title2:page2'") |
|
|
| |
| p = sub.add_parser("text", help="Extract text from PDF") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", default=None, help="Save to file instead of printing") |
| p.add_argument("--pages", help="Pages to extract (all if omitted)") |
|
|
| |
| p = sub.add_parser("info", help="Display PDF information") |
| p.add_argument("-i", "--input", required=True) |
|
|
| |
| p = sub.add_parser("nup", help="Arrange N pages per sheet") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
| p.add_argument("--layout", default="2x1", help="Layout e.g. 2x1, 2x2, 4x1") |
|
|
| |
| p = sub.add_parser("compress", help="Compress PDF streams") |
| p.add_argument("-i", "--input", required=True) |
| p.add_argument("-o", "--output", required=True) |
|
|
| |
| p = sub.add_parser("batch-remove", help="Remove pages from all PDFs in a directory") |
| p.add_argument("--dir", required=True) |
| p.add_argument("--pages", required=True) |
| p.add_argument("--suffix", default="_modified") |
|
|
| |
| p = sub.add_parser("batch-merge", help="Merge all PDFs in a directory") |
| p.add_argument("--dir", required=True) |
| p.add_argument("-o", "--output", required=True) |
|
|
| |
| p = sub.add_parser("batch-split", help="Split all PDFs in a directory into pages") |
| p.add_argument("--dir", required=True) |
| p.add_argument("--out-dir", required=True) |
|
|
| return parser |
|
|
|
|
| COMMANDS = { |
| "merge": cmd_merge, |
| "split": cmd_split, |
| "remove": cmd_remove, |
| "extract": cmd_extract, |
| "reorder": cmd_reorder, |
| "rotate": cmd_rotate, |
| "reverse": cmd_reverse, |
| "duplicate": cmd_duplicate, |
| "insert-blank": cmd_insert_blank, |
| "insert": cmd_insert_pdf, |
| "replace": cmd_replace, |
| "crop": cmd_crop, |
| "scale": cmd_scale, |
| "watermark": cmd_watermark, |
| "stamp": cmd_stamp, |
| "number": cmd_number, |
| "encrypt": cmd_encrypt, |
| "decrypt": cmd_decrypt, |
| "metadata": cmd_metadata, |
| "bookmarks": cmd_bookmarks, |
| "text": cmd_text, |
| "info": cmd_info, |
| "nup": cmd_nup, |
| "compress": cmd_compress, |
| "batch-remove": cmd_batch_remove, |
| "batch-merge": cmd_batch_merge, |
| "batch-split": cmd_batch_split, |
| } |
|
|
|
|
| def main() -> None: |
| parser = build_parser() |
| args = parser.parse_args() |
| handler = COMMANDS.get(args.command) |
| if handler is None: |
| parser.print_help() |
| sys.exit(1) |
| try: |
| handler(args) |
| except Exception as exc: |
| print(f"[ERROR] {exc}", file=sys.stderr) |
| sys.exit(1) |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|