Separate PDF's by locality

2025-06-30 11:59:26 -07:00
parent 72e4f0e260
commit c8b9ad55d5
3 changed files with 63 additions and 44 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 *.pdf
 *.tsv
 *.csv
 *.zip
--- a/README.md
+++ b/README.md
@ -4,12 +4,13 @@ This is how you can generate PDFs for End of Term Surveys. Some examples are pro
 ## Usage
 <!-- Don't use this!!!
 ### If you do not want emoji support:
 ```
 pip install fpdf2
 ```
-Use `generate_pdf.py` to convert a tsv to a pdf.
+Use `generate_pdf.py` to convert a tsv to a pdf. -->
 ### If you do want emoji support:
@ -17,24 +18,24 @@ Use `generate_pdf.py` to convert a tsv to a pdf.
 pip install playwright jinja2
 playwright install
 ```
-Use `generate_pdf_emoji_support.py` to convert a tsv to a pdf.
+Use `generate_pdf_emoji_support.py` to convert a tsv into many PDF's separated by locality into a file.
 ## Usage Examples
-```
+<!-- ```
 python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
-```
+``` -->
 ```
-python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
+python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality)"
 ```
-```
+<!-- ```
 python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
-```
+``` -->
 ```
-python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
+python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality)"
 ```
 ## Actual Usage Example
@ -47,4 +48,5 @@ python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv"
    - at the end of Fourth Term Survey, it probably should be called "Further Comments"
 3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work.
-
+## To transfer folder by email:
 Folder must be zipped.
--- a/generate_pdf_emoji_support.py
+++ b/generate_pdf_emoji_support.py
@ -4,6 +4,8 @@ import re
 import tempfile
 import jinja2
 import os
 import shutil
 import zipfile
 from playwright.sync_api import sync_playwright
 from itertools import groupby
@ -23,15 +25,15 @@ def ensure_colon(heading: str) -> str:
    return heading
 def main():
-    parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.")
+    parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
    parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
-    parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.")
+    parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
    parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
                        help="Which browser engine to use (default: chromium).")
    args = parser.parse_args()
    input_tsv = args.input_tsv
-    output_pdf = args.output_pdf
+    output_pdf_folder = args.output_pdf_folder
    # 1. Read the TSV data
    with open(input_tsv, 'r', encoding='utf-8-sig') as f:
@ -43,9 +45,6 @@ def main():
        r.get("Sending locality:", "").strip(),
        r.get("Name (last, first): ", "").strip()
    ))
    # Group rows by locality to generate separate pdfs
    grouped_rows = groupby(rows, key=lambda r: r.get("Sending locality:", "").strip())
    # Known columns to skip
    skip_cols = {
@ -58,7 +57,7 @@ def main():
    # 3. Process each row into a data structure for Jinja2
    processed_rows = []
    for row in rows:
-        #name = row.get('Name (last, first):', '').strip() # TODO: Refactor: This isn't working because it's 'Name (last, first): '. Unused for now.
+        name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
        locality = row.get('Sending locality:', '').strip()
        q_and_a = []
@ -97,11 +96,14 @@ def main():
                    })
        processed_rows.append({
-            # "name": name,
+            "name": name,
            "locality": locality,
            "entries": q_and_a
        })
    processed_rows = sorted(processed_rows, key=lambda r: r["locality"])  # required for groupby
    grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
    # 4. Create an HTML template (Jinja2)
    #    We'll use page-break-after so each row is on a new PDF page.
    html_template_str = r"""
@ -130,12 +132,6 @@ def main():
      page-break-after: always;
    }
    h1 {
      font-size: 18pt;
      margin: 0 0 0.5em 0;
      color: #1a365d;
    }
    h2 {
      font-size: 14pt;
      margin: 0;
@ -244,7 +240,6 @@ def main():
 <body>
  {% for row in rows %}
  <div class="trainee-page">
    <h1>{{ row.name }}</h1>
    <p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
    {% for entry in row.entries %}
@ -289,35 +284,56 @@ def main():
 </html>
 """
-    # 5. Render the template with Jinja2
+    # Define the output folder and zip filename
-    template = jinja2.Template(html_template_str)
+    output_dir = output_pdf_folder
-    rendered_html = template.render(rows=processed_rows)
+    zip_filename = f"{output_dir}.zip"
    # Remove existing output zip file if it exists
    if os.path.exists(zip_filename):
      os.remove(zip_filename)
    # Create or reset the output folder
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)
    # Render and export one PDF per locality
    template = jinja2.Template(html_template_str)
    with sync_playwright() as p:
        browser = p.__getattribute__(args.browser).launch()
        context = browser.new_context()
        for locality, group in grouped_rows:
            entries = list(group)  # Materialize the group iterator
            rendered_html = template.render(rows=entries)
    # 6. Convert the HTML to PDF using Playwright
    #    We'll create a temporary HTML file, open it in a headless browser, and save as PDF.
            with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
                tmp_html_path = tmp_file.name
                tmp_file.write(rendered_html.encode("utf-8"))
                tmp_file.flush()
-    with sync_playwright() as p:
+            # Sanitize filename (remove/replace special characters)
-        # launch the selected browser (chromium/firefox/webkit)
+            safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
-        browser = p.__getattribute__(args.browser).launch()
+            pdf_filename = f"{safe_locality}.pdf"
-        context = browser.new_context()
+            pdf_path = os.path.join(output_dir, pdf_filename)
            page = context.new_page()
        # Load the local HTML file
            page.goto(f"file://{tmp_html_path}")
        # PDF Options: letter format, etc.
        # For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions
            page.pdf(
-            path=output_pdf,
+                path=pdf_path,
                format="letter",
                margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
            )
            os.remove(tmp_html_path)
            print(f"✅ Generated PDF: {pdf_path}")
        browser.close()
    # 📦 Zip the output folder
    shutil.make_archive(output_dir, 'zip', output_dir)
    print(f"📁 Zipped folder created: {zip_filename}")
 if __name__ == "__main__":
    main()