PDFs
Here are all the snippets available using this library:
create_invoice.py
from fpdf import FPDF
CENTER = "C"
def create_custom_invoice(
title: str,
subtitle: str,
items: list[tuple[str, int, int]],
filename: str = "invoice.pdf",
):
"""
Creates a custom invoice PDF document using fpdf2.
Args:
title: Title of the invoice.
subtitle: Subtitle of the invoice.
items: List of tuples containing item details (name, quantity, price).
filename: Name of the output PDF file.
Example:
>>> create_custom_invoice(
... "Invoice",
... "Number #123",
... [
... ("Software Development 1", 1, 5500),
... ("Consultancy 2", 1, 1000),
... ("Equipment", 3, 300),
... ]
... )
"""
pdf = FPDF()
pdf.add_page()
pdf.set_font("Arial", size=12)
pdf.cell(200, 10, txt=title, ln=True, align=CENTER)
pdf.cell(200, 10, txt=subtitle, ln=True, align=CENTER)
pdf.ln(10)
col_width = 60
row_height = 10
for item in items:
for element in item:
pdf.cell(col_width, row_height, txt=str(element), border=1)
pdf.ln(row_height)
pdf.output(filename)
extract_metadata.py
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdfparser import PDFParser
def extract_pdf_metadata(pdf_file):
"""
Extracts metadata from a PDF file using PDFMiner.six.
Args:
pdf_file: Path to the PDF file.
Returns:
A dictionary containing PDF metadata.
"""
metadata = {}
with open(pdf_file, "rb") as f:
parser = PDFParser(f)
document = PDFDocument(parser)
doc_info = document.info
# Extract metadata
metadata = {
"Title": doc_info.get("Title"),
"Author": doc_info.get("Author"),
"Subject": doc_info.get("Subject"),
"Keywords": doc_info.get("Keywords"),
"Producer": doc_info.get("Producer"),
"Creator": doc_info.get("Creator"),
"CreationDate": doc_info.get("CreationDate"),
}
return metadata
extract_images.py
from pypdf import PdfReader
def extract_images_from_first_page(pdf_file: str, output_dir: str) -> None:
"""
Extracts images from the first page of a PDF file using PyPDF.
Args:
pdf_file: Path to the PDF file.
output_dir: Path dir to save the extracted images.
"""
reader = PdfReader(pdf_file)
for page_num, page in enumerate(reader.pages):
count = 0
for image_file_object in page.images:
with open(f"{output_dir}/page_{page_num}_image_{count}.png", "wb") as fp:
fp.write(image_file_object.data)
count += 1