import argparse import csv import re import tempfile import jinja2 import os import shutil import zipfile from playwright.sync_api import sync_playwright from itertools import groupby def is_number(s): """Returns True if s can be interpreted as a float.""" try: float(s) return True except ValueError: return False def ensure_colon(heading: str) -> str: """Append ':' only if heading does not already end with one.""" heading = heading.strip() if not heading.endswith(":"): heading += ":" return heading def main(): parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.") parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.") parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.") parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium", help="Which browser engine to use (default: chromium).") args = parser.parse_args() input_tsv = args.input_tsv output_pdf_folder = args.output_pdf_folder # 1. Read the TSV data with open(input_tsv, 'r', encoding='utf-8-sig') as f: reader = csv.DictReader(f, delimiter='\t') rows = list(reader) # 2. Sort rows by sending locality first, then by name rows = sorted(rows, key=lambda r: ( r.get("Sending locality:", "").strip(), r.get("Name (last, first): ", "").strip() )) # Known columns to skip skip_cols = { 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:' } # 3. Process each row into a data structure for Jinja2 processed_rows = [] for row in rows: name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor locality = row.get('Sending locality:', '').strip() q_and_a = [] for col in row.keys(): if col in skip_cols: continue col_title = col.strip() value = row[col].strip() if not value: continue # Check if "Comments" column if re.match(r'^Comments\s\d+$', col_title, re.IGNORECASE): # e.g. "Comments 1" => italic "Comments:" q_and_a.append({ "type": "comments", "label": "Comments", "value": value }) else: # Normal question if is_number(value): # Numeric => "Truth: 4" on one line q_and_a.append({ "type": "inline", "label": ensure_colon(col_title), "value": value }) else: # Non-numeric => label on one line, answer on next q_and_a.append({ "type": "block", "label": col_title, "value": value }) processed_rows.append({ "name": name, "locality": locality, "entries": q_and_a }) processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby grouped_rows = groupby(processed_rows, key=lambda r: r["locality"]) # 4. Create an HTML template (Jinja2) # We'll use page-break-after so each row is on a new PDF page. html_template_str = r"""
Sending Locality: {{ row.locality }}
{% for entry in row.entries %} {% if entry.type == "comments" %}Comments:
{{ entry.value }}
{% elif entry.type == "inline" %}Please evaluate your progress for this past term (1-5, 5 indicating the most progress):
Truth: {{ entry.value }}
{% elif "Column 6" in entry.label %}Life: {{ entry.value }}
{% elif "Column 8" in entry.label %}Gospel: {{ entry.value }}
{% elif "Column 10" in entry.label %}Character: {{ entry.value }}
{% elif "Column 12" in entry.label %}Service: {{ entry.value }}
{% else %} {{ entry.label }}{{ entry.value }}
{% endif %}{{ entry.label }} {{ entry.value }}
{% elif "Column" in entry.label %}Comments:
{{ entry.value }}
{{ entry.label }}
{{ entry.value }}
{% endif %} {% endif %} {% endfor %}