Separate PDF's by locality

15 hours ago · c8b9ad55d5
parent 72e4f0e260
commit c8b9ad55d5
3 changed files with 63 additions and 44 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,3 +1,4 @@
 *.pdf
 *.tsv
 *.csv
+*.zip
--- a/README.md
+++ b/README.md
@ -4,12 +4,13 @@ This is how you can generate PDFs for End of Term Surveys. Some examples are pro

 ## Usage

+<!-- Don't use this!!!
 ### If you do not want emoji support:

 ```
 pip install fpdf2
 ```
-Use `generate_pdf.py` to convert a tsv to a pdf.
+Use `generate_pdf.py` to convert a tsv to a pdf. -->

 ### If you do want emoji support:

@ -17,24 +18,24 @@ Use `generate_pdf.py` to convert a tsv to a pdf.
 pip install playwright jinja2
 playwright install
 ```
-Use `generate_pdf_emoji_support.py` to convert a tsv to a pdf.
+Use `generate_pdf_emoji_support.py` to convert a tsv into many PDF's separated by locality into a file.

 ## Usage Examples

-```
+<!-- ```
 python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
-```
+``` -->

 ```
-python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
+python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality)"
 ```

-```
+<!-- ```
 python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
-```
+``` -->

 ```
-python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
+python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality)"
 ```

 ## Actual Usage Example
@ -47,4 +48,5 @@ python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv"
    - at the end of Fourth Term Survey, it probably should be called "Further Comments"
 3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work.

-
+## To transfer folder by email:
+Folder must be zipped.
--- a/generate_pdf_emoji_support.py
+++ b/generate_pdf_emoji_support.py
@ -4,6 +4,8 @@ import re
 import tempfile
 import jinja2
 import os
+import shutil
+import zipfile
 from playwright.sync_api import sync_playwright
 from itertools import groupby

@ -23,15 +25,15 @@ def ensure_colon(heading: str) -> str:
    return heading

 def main():
-    parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.")
+    parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
    parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
-    parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.")
+    parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
    parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
                        help="Which browser engine to use (default: chromium).")
    args = parser.parse_args()

    input_tsv = args.input_tsv
-    output_pdf = args.output_pdf
+    output_pdf_folder = args.output_pdf_folder

    # 1. Read the TSV data
    with open(input_tsv, 'r', encoding='utf-8-sig') as f:
@ -43,9 +45,6 @@ def main():
        r.get("Sending locality:", "").strip(),
        r.get("Name (last, first): ", "").strip()
    ))
-    # Group rows by locality to generate separate pdfs
-    grouped_rows = groupby(rows, key=lambda r: r.get("Sending locality:", "").strip())
-
    
    # Known columns to skip
    skip_cols = {
@ -58,7 +57,7 @@ def main():
    # 3. Process each row into a data structure for Jinja2
    processed_rows = []
    for row in rows:
-        #name = row.get('Name (last, first):', '').strip() # TODO: Refactor: This isn't working because it's 'Name (last, first): '. Unused for now.
+        name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
        locality = row.get('Sending locality:', '').strip()

        q_and_a = []
@ -97,11 +96,14 @@ def main():
                    })

        processed_rows.append({
-            # "name": name,
+            "name": name,
            "locality": locality,
            "entries": q_and_a
        })

+    processed_rows = sorted(processed_rows, key=lambda r: r["locality"])  # required for groupby
+    grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
+
    # 4. Create an HTML template (Jinja2)
    #    We'll use page-break-after so each row is on a new PDF page.
    html_template_str = r"""
@ -130,12 +132,6 @@ def main():
      page-break-after: always;
    }

-    h1 {
-      font-size: 18pt;
-      margin: 0 0 0.5em 0;
-      color: #1a365d;
-    }
-
    h2 {
      font-size: 14pt;
      margin: 0;
@ -244,7 +240,6 @@ def main():
 <body>
  {% for row in rows %}
  <div class="trainee-page">
-    <h1>{{ row.name }}</h1>
    <p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>

    {% for entry in row.entries %}
@ -289,35 +284,56 @@ def main():
 </html>
 """

-    # 5. Render the template with Jinja2
-    template = jinja2.Template(html_template_str)
-    rendered_html = template.render(rows=processed_rows)
+    # Define the output folder and zip filename
+    output_dir = output_pdf_folder
+    zip_filename = f"{output_dir}.zip"
+
+    # Remove existing output zip file if it exists
+    if os.path.exists(zip_filename):
+      os.remove(zip_filename)

-    # 6. Convert the HTML to PDF using Playwright
-    #    We'll create a temporary HTML file, open it in a headless browser, and save as PDF.
-    with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
-        tmp_html_path = tmp_file.name
-        tmp_file.write(rendered_html.encode("utf-8"))
-        tmp_file.flush()
+    # Create or reset the output folder
+    if os.path.exists(output_dir):
+        shutil.rmtree(output_dir)
+    os.makedirs(output_dir, exist_ok=True)
+
+    # Render and export one PDF per locality
+    template = jinja2.Template(html_template_str)

    with sync_playwright() as p:
-        # launch the selected browser (chromium/firefox/webkit)
        browser = p.__getattribute__(args.browser).launch()
        context = browser.new_context()
-        page = context.new_page()

-        # Load the local HTML file
-        page.goto(f"file://{tmp_html_path}")
+        for locality, group in grouped_rows:
+            entries = list(group)  # Materialize the group iterator
+
+            rendered_html = template.render(rows=entries)

-        # PDF Options: letter format, etc.
-        # For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions
-        page.pdf(
-            path=output_pdf,
-            format="letter",
-            margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
-        )
+            with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
+                tmp_html_path = tmp_file.name
+                tmp_file.write(rendered_html.encode("utf-8"))
+                tmp_file.flush()
+
+            # Sanitize filename (remove/replace special characters)
+            safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
+            pdf_filename = f"{safe_locality}.pdf"
+            pdf_path = os.path.join(output_dir, pdf_filename)
+
+            page = context.new_page()
+            page.goto(f"file://{tmp_html_path}")
+            page.pdf(
+                path=pdf_path,
+                format="letter",
+                margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
+            )
+            os.remove(tmp_html_path)
+            print(f"✅ Generated PDF: {pdf_path}")

        browser.close()

+    # 📦 Zip the output folder
+    shutil.make_archive(output_dir, 'zip', output_dir)
+    print(f"📁 Zipped folder created: {zip_filename}")
+
 if __name__ == "__main__":
    main()