import argparse import csv import re import tempfile import jinja2 import os from playwright.sync_api import sync_playwright def is_number(s): """Returns True if s can be interpreted as a float.""" try: float(s) return True except ValueError: return False def ensure_colon(heading: str) -> str: """Append ':' only if heading does not already end with one.""" heading = heading.strip() if not heading.endswith(":"): heading += ":" return heading def main(): parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.") parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.") parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.") parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium", help="Which browser engine to use (default: chromium).") args = parser.parse_args() input_tsv = args.input_tsv output_pdf = args.output_pdf # 1. Read the TSV data with open(input_tsv, 'r', encoding='utf-8-sig') as f: reader = csv.DictReader(f, delimiter='\t') rows = list(reader) # 2. Sort rows by sending locality rows = sorted(rows, key=lambda r: r.get("Sending locality:", "").strip()) # Known columns to skip skip_cols = { 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:' } # 3. Process each row into a data structure for Jinja2 processed_rows = [] for row in rows: name = row.get('Name (last, first):', '').strip() locality = row.get('Sending locality:', '').strip() q_and_a = [] for col in row.keys(): if col in skip_cols: continue col_title = col.strip() value = row[col].strip() if not value: continue # Check if "Comments" column if re.match(r'^Comments\s\d+$', col_title, re.IGNORECASE): # e.g. "Comments 1" => italic "Comments:" q_and_a.append({ "type": "comments", "label": "Comments", "value": value }) else: # Normal question if is_number(value): # Numeric => "Truth: 4" on one line q_and_a.append({ "type": "inline", "label": ensure_colon(col_title), "value": value }) else: # Non-numeric => label on one line, answer on next q_and_a.append({ "type": "block", "label": col_title, "value": value }) processed_rows.append({ "name": name, "locality": locality, "entries": q_and_a }) # 4. Create an HTML template (Jinja2) # We'll use page-break-after so each row is on a new PDF page. html_template_str = r""" """ + rf""" {os.path.basename(output_pdf)} """ + r""" {% for row in rows %}

{{ row.name }}

Sending Locality: {{ row.locality }}

{% for entry in row.entries %} {% if entry.type == "comments" %}

Comments:

{{ entry.value }}

{% elif entry.type == "inline" %}

{{ entry.label }} {{ entry.value }}

{% else %}

{{ entry.label }}

{{ entry.value }}

{% endif %} {% endfor %}
{% endfor %} """ # 5. Render the template with Jinja2 template = jinja2.Template(html_template_str) rendered_html = template.render(rows=processed_rows) # 6. Convert the HTML to PDF using Playwright # We'll create a temporary HTML file, open it in a headless browser, and save as PDF. with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file: tmp_html_path = tmp_file.name tmp_file.write(rendered_html.encode("utf-8")) tmp_file.flush() with sync_playwright() as p: # launch the selected browser (chromium/firefox/webkit) browser = p.__getattribute__(args.browser).launch() context = browser.new_context() page = context.new_page() # Load the local HTML file page.goto(f"file://{tmp_html_path}") # PDF Options: letter format, etc. # For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions page.pdf( path=output_pdf, format="letter", margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"} ) browser.close() if __name__ == "__main__": main()