PDFs library | Robocorp documentation

create_invoice.py

from fpdf import FPDF CENTER = "C" def create_custom_invoice( title: str, subtitle: str, items: list[tuple[str, int, int]], filename: str = "invoice.pdf", ): """ Creates a custom invoice PDF document using fpdf2. Args: title: Title of the invoice. subtitle: Subtitle of the invoice. items: List of tuples containing item details (name, quantity, price). filename: Name of the output PDF file. Example: >>> create_custom_invoice( ... "Invoice", ... "Number #123", ... [ ... ("Software Development 1", 1, 5500), ... ("Consultancy 2", 1, 1000), ... ("Equipment", 3, 300), ... ] ... ) """ pdf = FPDF() pdf.add_page() pdf.set_font("Arial", size=12) pdf.cell(200, 10, txt=title, ln=True, align=CENTER) pdf.cell(200, 10, txt=subtitle, ln=True, align=CENTER) pdf.ln(10) col_width = 60 row_height = 10 for item in items: for element in item: pdf.cell(col_width, row_height, txt=str(element), border=1) pdf.ln(row_height) pdf.output(filename)

extract_metadata.py

from pdfminer.pdfdocument import PDFDocument from pdfminer.pdfparser import PDFParser def extract_pdf_metadata(pdf_file): """ Extracts metadata from a PDF file using PDFMiner.six. Args: pdf_file: Path to the PDF file. Returns: A dictionary containing PDF metadata. """ metadata = {} with open(pdf_file, "rb") as f: parser = PDFParser(f) document = PDFDocument(parser) doc_info = document.info # Extract metadata metadata = { "Title": doc_info.get("Title"), "Author": doc_info.get("Author"), "Subject": doc_info.get("Subject"), "Keywords": doc_info.get("Keywords"), "Producer": doc_info.get("Producer"), "Creator": doc_info.get("Creator"), "CreationDate": doc_info.get("CreationDate"), } return metadata

extract_images.py

from pypdf import PdfReader def extract_images_from_first_page(pdf_file: str, output_dir: str) -> None: """ Extracts images from the first page of a PDF file using PyPDF. Args: pdf_file: Path to the PDF file. output_dir: Path dir to save the extracted images. """ reader = PdfReader(pdf_file) for page_num, page in enumerate(reader.pages): count = 0 for image_file_object in page.images: with open(f"{output_dir}/page_{page_num}_image_{count}.png", "wb") as fp: fp.write(image_file_object.data) count += 1