import argparse import csv import re import tempfile import jinja2 import os import shutil import zipfile from playwright.sync_api import sync_playwright from itertools import groupby def is_number(s): """Returns True if s can be interpreted as a float.""" try: float(s) return True except ValueError: return False def ensure_colon(heading: str) -> str: """Append ':' only if heading does not already end with one.""" heading = heading.strip() if not heading.endswith(":"): heading += ":" return heading def main(): parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.") parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.") parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.") parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium", help="Which browser engine to use (default: chromium).") args = parser.parse_args() input_tsv = args.input_tsv output_pdf_folder = args.output_pdf_folder # 1. Read the TSV data with open(input_tsv, 'r', encoding='utf-8-sig') as f: reader = csv.DictReader(f, delimiter='\t') rows = list(reader) # 2. Sort rows by sending locality first, then by name rows = sorted(rows, key=lambda r: ( r.get("Sending locality:", "").strip(), r.get("Name (last, first): ", "").strip() )) # Known columns to skip skip_cols = { 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:' } # 3. Process each row into a data structure for Jinja2 processed_rows = [] for row in rows: name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor locality = row.get('Sending locality:', '').strip() q_and_a = [] for col in row.keys(): if col in skip_cols: continue col_title = col.strip() value = row[col].strip() if not value: continue # Check if "Comments" column if re.match(r'^Comments\s\d+$', col_title, re.IGNORECASE): # e.g. "Comments 1" => italic "Comments:" q_and_a.append({ "type": "comments", "label": "Comments", "value": value }) else: # Normal question if is_number(value): # Numeric => "Truth: 4" on one line q_and_a.append({ "type": "inline", "label": ensure_colon(col_title), "value": value }) else: # Non-numeric => label on one line, answer on next q_and_a.append({ "type": "block", "label": col_title, "value": value }) processed_rows.append({ "name": name, "locality": locality, "entries": q_and_a }) processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby grouped_rows = groupby(processed_rows, key=lambda r: r["locality"]) # 4. Create an HTML template (Jinja2) # We'll use page-break-after so each row is on a new PDF page. html_template_str = r""" Data Report {% for row in rows %}

Sending Locality: {{ row.locality }}

{% for entry in row.entries %} {% if entry.type == "comments" %}

Comments:

{{ entry.value }}

{% elif entry.type == "inline" %}
{% if "Column 4" in entry.label %}

Please evaluate your progress for this past term (1-5, 5 indicating the most progress):

Truth: {{ entry.value }}

{% elif "Column 6" in entry.label %}

Life: {{ entry.value }}

{% elif "Column 8" in entry.label %}

Gospel: {{ entry.value }}

{% elif "Column 10" in entry.label %}

Character: {{ entry.value }}

{% elif "Column 12" in entry.label %}

Service: {{ entry.value }}

{% else %} {{ entry.label }}

{{ entry.value }}

{% endif %}
{% else %} {% if "Name" in entry.label %}

{{ entry.label }} {{ entry.value }}

{% elif "Column" in entry.label %}

Comments:

{{ entry.value }}


{% else %}

{{ entry.label }}

{{ entry.value }}

{% endif %} {% endif %} {% endfor %}
{% endfor %} """ # Define the output folder and zip filename output_dir = output_pdf_folder zip_filename = f"{output_dir}.zip" # Remove existing output zip file if it exists if os.path.exists(zip_filename): os.remove(zip_filename) # Create or reset the output folder if os.path.exists(output_dir): shutil.rmtree(output_dir) os.makedirs(output_dir, exist_ok=True) # Render and export one PDF per locality template = jinja2.Template(html_template_str) with sync_playwright() as p: browser = p.__getattribute__(args.browser).launch() context = browser.new_context() for locality, group in grouped_rows: entries = list(group) # Materialize the group iterator rendered_html = template.render(rows=entries) with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file: tmp_html_path = tmp_file.name tmp_file.write(rendered_html.encode("utf-8")) tmp_file.flush() # Sanitize filename (remove/replace special characters) safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_") pdf_filename = f"{safe_locality}.pdf" pdf_path = os.path.join(output_dir, pdf_filename) page = context.new_page() page.goto(f"file://{tmp_html_path}") page.pdf( path=pdf_path, format="letter", margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"} ) os.remove(tmp_html_path) print(f"✅ Generated PDF: {pdf_path}") browser.close() # 📦 Zip the output folder shutil.make_archive(output_dir, 'zip', output_dir) print(f"📁 Zipped folder created: {zip_filename}") if __name__ == "__main__": main()