Separate PDF's by locality

main
David 21 hours ago
parent 72e4f0e260
commit c8b9ad55d5

1
.gitignore vendored

@ -1,3 +1,4 @@
*.pdf *.pdf
*.tsv *.tsv
*.csv *.csv
*.zip

@ -4,12 +4,13 @@ This is how you can generate PDFs for End of Term Surveys. Some examples are pro
## Usage ## Usage
<!-- Don't use this!!!
### If you do not want emoji support: ### If you do not want emoji support:
``` ```
pip install fpdf2 pip install fpdf2
``` ```
Use `generate_pdf.py` to convert a tsv to a pdf. Use `generate_pdf.py` to convert a tsv to a pdf. -->
### If you do want emoji support: ### If you do want emoji support:
@ -17,24 +18,24 @@ Use `generate_pdf.py` to convert a tsv to a pdf.
pip install playwright jinja2 pip install playwright jinja2
playwright install playwright install
``` ```
Use `generate_pdf_emoji_support.py` to convert a tsv to a pdf. Use `generate_pdf_emoji_support.py` to convert a tsv into many PDF's separated by locality into a file.
## Usage Examples ## Usage Examples
``` <!-- ```
python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf" python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
``` ``` -->
``` ```
python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf" python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality)"
``` ```
``` <!-- ```
python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf" python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
``` ``` -->
``` ```
python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf" python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality)"
``` ```
## Actual Usage Example ## Actual Usage Example
@ -47,4 +48,5 @@ python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv"
- at the end of Fourth Term Survey, it probably should be called "Further Comments" - at the end of Fourth Term Survey, it probably should be called "Further Comments"
3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work. 3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work.
## To transfer folder by email:
Folder must be zipped.

@ -4,6 +4,8 @@ import re
import tempfile import tempfile
import jinja2 import jinja2
import os import os
import shutil
import zipfile
from playwright.sync_api import sync_playwright from playwright.sync_api import sync_playwright
from itertools import groupby from itertools import groupby
@ -23,15 +25,15 @@ def ensure_colon(heading: str) -> str:
return heading return heading
def main(): def main():
parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.") parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.") parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.") parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium", parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
help="Which browser engine to use (default: chromium).") help="Which browser engine to use (default: chromium).")
args = parser.parse_args() args = parser.parse_args()
input_tsv = args.input_tsv input_tsv = args.input_tsv
output_pdf = args.output_pdf output_pdf_folder = args.output_pdf_folder
# 1. Read the TSV data # 1. Read the TSV data
with open(input_tsv, 'r', encoding='utf-8-sig') as f: with open(input_tsv, 'r', encoding='utf-8-sig') as f:
@ -43,9 +45,6 @@ def main():
r.get("Sending locality:", "").strip(), r.get("Sending locality:", "").strip(),
r.get("Name (last, first): ", "").strip() r.get("Name (last, first): ", "").strip()
)) ))
# Group rows by locality to generate separate pdfs
grouped_rows = groupby(rows, key=lambda r: r.get("Sending locality:", "").strip())
# Known columns to skip # Known columns to skip
skip_cols = { skip_cols = {
@ -58,7 +57,7 @@ def main():
# 3. Process each row into a data structure for Jinja2 # 3. Process each row into a data structure for Jinja2
processed_rows = [] processed_rows = []
for row in rows: for row in rows:
#name = row.get('Name (last, first):', '').strip() # TODO: Refactor: This isn't working because it's 'Name (last, first): '. Unused for now. name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
locality = row.get('Sending locality:', '').strip() locality = row.get('Sending locality:', '').strip()
q_and_a = [] q_and_a = []
@ -97,11 +96,14 @@ def main():
}) })
processed_rows.append({ processed_rows.append({
# "name": name, "name": name,
"locality": locality, "locality": locality,
"entries": q_and_a "entries": q_and_a
}) })
processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby
grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
# 4. Create an HTML template (Jinja2) # 4. Create an HTML template (Jinja2)
# We'll use page-break-after so each row is on a new PDF page. # We'll use page-break-after so each row is on a new PDF page.
html_template_str = r""" html_template_str = r"""
@ -130,12 +132,6 @@ def main():
page-break-after: always; page-break-after: always;
} }
h1 {
font-size: 18pt;
margin: 0 0 0.5em 0;
color: #1a365d;
}
h2 { h2 {
font-size: 14pt; font-size: 14pt;
margin: 0; margin: 0;
@ -244,7 +240,6 @@ def main():
<body> <body>
{% for row in rows %} {% for row in rows %}
<div class="trainee-page"> <div class="trainee-page">
<h1>{{ row.name }}</h1>
<p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p> <p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
{% for entry in row.entries %} {% for entry in row.entries %}
@ -289,35 +284,56 @@ def main():
</html> </html>
""" """
# 5. Render the template with Jinja2 # Define the output folder and zip filename
template = jinja2.Template(html_template_str) output_dir = output_pdf_folder
rendered_html = template.render(rows=processed_rows) zip_filename = f"{output_dir}.zip"
# Remove existing output zip file if it exists
if os.path.exists(zip_filename):
os.remove(zip_filename)
# 6. Convert the HTML to PDF using Playwright # Create or reset the output folder
# We'll create a temporary HTML file, open it in a headless browser, and save as PDF. if os.path.exists(output_dir):
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file: shutil.rmtree(output_dir)
tmp_html_path = tmp_file.name os.makedirs(output_dir, exist_ok=True)
tmp_file.write(rendered_html.encode("utf-8"))
tmp_file.flush() # Render and export one PDF per locality
template = jinja2.Template(html_template_str)
with sync_playwright() as p: with sync_playwright() as p:
# launch the selected browser (chromium/firefox/webkit)
browser = p.__getattribute__(args.browser).launch() browser = p.__getattribute__(args.browser).launch()
context = browser.new_context() context = browser.new_context()
page = context.new_page()
# Load the local HTML file for locality, group in grouped_rows:
page.goto(f"file://{tmp_html_path}") entries = list(group) # Materialize the group iterator
rendered_html = template.render(rows=entries)
# PDF Options: letter format, etc. with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
# For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions tmp_html_path = tmp_file.name
page.pdf( tmp_file.write(rendered_html.encode("utf-8"))
path=output_pdf, tmp_file.flush()
format="letter",
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"} # Sanitize filename (remove/replace special characters)
) safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
pdf_filename = f"{safe_locality}.pdf"
pdf_path = os.path.join(output_dir, pdf_filename)
page = context.new_page()
page.goto(f"file://{tmp_html_path}")
page.pdf(
path=pdf_path,
format="letter",
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
)
os.remove(tmp_html_path)
print(f"✅ Generated PDF: {pdf_path}")
browser.close() browser.close()
# 📦 Zip the output folder
shutil.make_archive(output_dir, 'zip', output_dir)
print(f"📁 Zipped folder created: {zip_filename}")
if __name__ == "__main__": if __name__ == "__main__":
main() main()

Loading…
Cancel
Save