diff --git a/.gitignore b/.gitignore index b2288dd..72e3650 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ *.pdf *.tsv *.csv +*.zip diff --git a/README.md b/README.md index 7caa856..e18888c 100644 --- a/README.md +++ b/README.md @@ -4,12 +4,13 @@ This is how you can generate PDFs for End of Term Surveys. Some examples are pro ## Usage + ### If you do want emoji support: @@ -17,24 +18,24 @@ Use `generate_pdf.py` to convert a tsv to a pdf. pip install playwright jinja2 playwright install ``` -Use `generate_pdf_emoji_support.py` to convert a tsv to a pdf. +Use `generate_pdf_emoji_support.py` to convert a tsv into many PDF's separated by locality into a file. ## Usage Examples -``` + ``` -python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf" +python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality)" ``` -``` + ``` -python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf" +python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality)" ``` ## Actual Usage Example @@ -47,4 +48,5 @@ python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" - at the end of Fourth Term Survey, it probably should be called "Further Comments" 3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work. - +## To transfer folder by email: +Folder must be zipped. \ No newline at end of file diff --git a/generate_pdf_emoji_support.py b/generate_pdf_emoji_support.py index 04df6cf..8a816b1 100644 --- a/generate_pdf_emoji_support.py +++ b/generate_pdf_emoji_support.py @@ -4,6 +4,8 @@ import re import tempfile import jinja2 import os +import shutil +import zipfile from playwright.sync_api import sync_playwright from itertools import groupby @@ -23,15 +25,15 @@ def ensure_colon(heading: str) -> str: return heading def main(): - parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.") + parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.") parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.") - parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.") + parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.") parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium", help="Which browser engine to use (default: chromium).") args = parser.parse_args() input_tsv = args.input_tsv - output_pdf = args.output_pdf + output_pdf_folder = args.output_pdf_folder # 1. Read the TSV data with open(input_tsv, 'r', encoding='utf-8-sig') as f: @@ -43,9 +45,6 @@ def main(): r.get("Sending locality:", "").strip(), r.get("Name (last, first): ", "").strip() )) - # Group rows by locality to generate separate pdfs - grouped_rows = groupby(rows, key=lambda r: r.get("Sending locality:", "").strip()) - # Known columns to skip skip_cols = { @@ -58,7 +57,7 @@ def main(): # 3. Process each row into a data structure for Jinja2 processed_rows = [] for row in rows: - #name = row.get('Name (last, first):', '').strip() # TODO: Refactor: This isn't working because it's 'Name (last, first): '. Unused for now. + name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor locality = row.get('Sending locality:', '').strip() q_and_a = [] @@ -97,11 +96,14 @@ def main(): }) processed_rows.append({ - # "name": name, + "name": name, "locality": locality, "entries": q_and_a }) + processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby + grouped_rows = groupby(processed_rows, key=lambda r: r["locality"]) + # 4. Create an HTML template (Jinja2) # We'll use page-break-after so each row is on a new PDF page. html_template_str = r""" @@ -130,12 +132,6 @@ def main(): page-break-after: always; } - h1 { - font-size: 18pt; - margin: 0 0 0.5em 0; - color: #1a365d; - } - h2 { font-size: 14pt; margin: 0; @@ -244,7 +240,6 @@ def main(): {% for row in rows %}
-

{{ row.name }}

Sending Locality: {{ row.locality }}

{% for entry in row.entries %} @@ -289,35 +284,56 @@ def main(): """ - # 5. Render the template with Jinja2 - template = jinja2.Template(html_template_str) - rendered_html = template.render(rows=processed_rows) + # Define the output folder and zip filename + output_dir = output_pdf_folder + zip_filename = f"{output_dir}.zip" + + # Remove existing output zip file if it exists + if os.path.exists(zip_filename): + os.remove(zip_filename) - # 6. Convert the HTML to PDF using Playwright - # We'll create a temporary HTML file, open it in a headless browser, and save as PDF. - with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file: - tmp_html_path = tmp_file.name - tmp_file.write(rendered_html.encode("utf-8")) - tmp_file.flush() + # Create or reset the output folder + if os.path.exists(output_dir): + shutil.rmtree(output_dir) + os.makedirs(output_dir, exist_ok=True) + + # Render and export one PDF per locality + template = jinja2.Template(html_template_str) with sync_playwright() as p: - # launch the selected browser (chromium/firefox/webkit) browser = p.__getattribute__(args.browser).launch() context = browser.new_context() - page = context.new_page() - # Load the local HTML file - page.goto(f"file://{tmp_html_path}") + for locality, group in grouped_rows: + entries = list(group) # Materialize the group iterator + + rendered_html = template.render(rows=entries) - # PDF Options: letter format, etc. - # For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions - page.pdf( - path=output_pdf, - format="letter", - margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"} - ) + with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file: + tmp_html_path = tmp_file.name + tmp_file.write(rendered_html.encode("utf-8")) + tmp_file.flush() + + # Sanitize filename (remove/replace special characters) + safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_") + pdf_filename = f"{safe_locality}.pdf" + pdf_path = os.path.join(output_dir, pdf_filename) + + page = context.new_page() + page.goto(f"file://{tmp_html_path}") + page.pdf( + path=pdf_path, + format="letter", + margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"} + ) + os.remove(tmp_html_path) + print(f"✅ Generated PDF: {pdf_path}") browser.close() + # 📦 Zip the output folder + shutil.make_archive(output_dir, 'zip', output_dir) + print(f"📁 Zipped folder created: {zip_filename}") + if __name__ == "__main__": main()