Separate PDF's by locality
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
||||
*.pdf
|
||||
*.tsv
|
||||
*.csv
|
||||
*.zip
|
||||
|
20
README.md
20
README.md
@ -4,12 +4,13 @@ This is how you can generate PDFs for End of Term Surveys. Some examples are pro
|
||||
|
||||
## Usage
|
||||
|
||||
<!-- Don't use this!!!
|
||||
### If you do not want emoji support:
|
||||
|
||||
```
|
||||
pip install fpdf2
|
||||
```
|
||||
Use `generate_pdf.py` to convert a tsv to a pdf.
|
||||
Use `generate_pdf.py` to convert a tsv to a pdf. -->
|
||||
|
||||
### If you do want emoji support:
|
||||
|
||||
@ -17,24 +18,24 @@ Use `generate_pdf.py` to convert a tsv to a pdf.
|
||||
pip install playwright jinja2
|
||||
playwright install
|
||||
```
|
||||
Use `generate_pdf_emoji_support.py` to convert a tsv to a pdf.
|
||||
Use `generate_pdf_emoji_support.py` to convert a tsv into many PDF's separated by locality into a file.
|
||||
|
||||
## Usage Examples
|
||||
|
||||
```
|
||||
<!-- ```
|
||||
python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
|
||||
```
|
||||
``` -->
|
||||
|
||||
```
|
||||
python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
|
||||
python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality)"
|
||||
```
|
||||
|
||||
```
|
||||
<!-- ```
|
||||
python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
|
||||
```
|
||||
``` -->
|
||||
|
||||
```
|
||||
python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
|
||||
python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality)"
|
||||
```
|
||||
|
||||
## Actual Usage Example
|
||||
@ -47,4 +48,5 @@ python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv"
|
||||
- at the end of Fourth Term Survey, it probably should be called "Further Comments"
|
||||
3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work.
|
||||
|
||||
|
||||
## To transfer folder by email:
|
||||
Folder must be zipped.
|
@ -4,6 +4,8 @@ import re
|
||||
import tempfile
|
||||
import jinja2
|
||||
import os
|
||||
import shutil
|
||||
import zipfile
|
||||
from playwright.sync_api import sync_playwright
|
||||
from itertools import groupby
|
||||
|
||||
@ -23,15 +25,15 @@ def ensure_colon(heading: str) -> str:
|
||||
return heading
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.")
|
||||
parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
|
||||
parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
|
||||
parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.")
|
||||
parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
|
||||
parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
|
||||
help="Which browser engine to use (default: chromium).")
|
||||
args = parser.parse_args()
|
||||
|
||||
input_tsv = args.input_tsv
|
||||
output_pdf = args.output_pdf
|
||||
output_pdf_folder = args.output_pdf_folder
|
||||
|
||||
# 1. Read the TSV data
|
||||
with open(input_tsv, 'r', encoding='utf-8-sig') as f:
|
||||
@ -43,9 +45,6 @@ def main():
|
||||
r.get("Sending locality:", "").strip(),
|
||||
r.get("Name (last, first): ", "").strip()
|
||||
))
|
||||
# Group rows by locality to generate separate pdfs
|
||||
grouped_rows = groupby(rows, key=lambda r: r.get("Sending locality:", "").strip())
|
||||
|
||||
|
||||
# Known columns to skip
|
||||
skip_cols = {
|
||||
@ -58,7 +57,7 @@ def main():
|
||||
# 3. Process each row into a data structure for Jinja2
|
||||
processed_rows = []
|
||||
for row in rows:
|
||||
#name = row.get('Name (last, first):', '').strip() # TODO: Refactor: This isn't working because it's 'Name (last, first): '. Unused for now.
|
||||
name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
|
||||
locality = row.get('Sending locality:', '').strip()
|
||||
|
||||
q_and_a = []
|
||||
@ -97,11 +96,14 @@ def main():
|
||||
})
|
||||
|
||||
processed_rows.append({
|
||||
# "name": name,
|
||||
"name": name,
|
||||
"locality": locality,
|
||||
"entries": q_and_a
|
||||
})
|
||||
|
||||
processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby
|
||||
grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
|
||||
|
||||
# 4. Create an HTML template (Jinja2)
|
||||
# We'll use page-break-after so each row is on a new PDF page.
|
||||
html_template_str = r"""
|
||||
@ -130,12 +132,6 @@ def main():
|
||||
page-break-after: always;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 18pt;
|
||||
margin: 0 0 0.5em 0;
|
||||
color: #1a365d;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 14pt;
|
||||
margin: 0;
|
||||
@ -244,7 +240,6 @@ def main():
|
||||
<body>
|
||||
{% for row in rows %}
|
||||
<div class="trainee-page">
|
||||
<h1>{{ row.name }}</h1>
|
||||
<p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
|
||||
|
||||
{% for entry in row.entries %}
|
||||
@ -289,35 +284,56 @@ def main():
|
||||
</html>
|
||||
"""
|
||||
|
||||
# 5. Render the template with Jinja2
|
||||
template = jinja2.Template(html_template_str)
|
||||
rendered_html = template.render(rows=processed_rows)
|
||||
# Define the output folder and zip filename
|
||||
output_dir = output_pdf_folder
|
||||
zip_filename = f"{output_dir}.zip"
|
||||
|
||||
# 6. Convert the HTML to PDF using Playwright
|
||||
# We'll create a temporary HTML file, open it in a headless browser, and save as PDF.
|
||||
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
|
||||
tmp_html_path = tmp_file.name
|
||||
tmp_file.write(rendered_html.encode("utf-8"))
|
||||
tmp_file.flush()
|
||||
# Remove existing output zip file if it exists
|
||||
if os.path.exists(zip_filename):
|
||||
os.remove(zip_filename)
|
||||
|
||||
# Create or reset the output folder
|
||||
if os.path.exists(output_dir):
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
|
||||
# Render and export one PDF per locality
|
||||
template = jinja2.Template(html_template_str)
|
||||
|
||||
with sync_playwright() as p:
|
||||
# launch the selected browser (chromium/firefox/webkit)
|
||||
browser = p.__getattribute__(args.browser).launch()
|
||||
context = browser.new_context()
|
||||
page = context.new_page()
|
||||
|
||||
# Load the local HTML file
|
||||
page.goto(f"file://{tmp_html_path}")
|
||||
for locality, group in grouped_rows:
|
||||
entries = list(group) # Materialize the group iterator
|
||||
|
||||
# PDF Options: letter format, etc.
|
||||
# For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions
|
||||
page.pdf(
|
||||
path=output_pdf,
|
||||
format="letter",
|
||||
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
|
||||
)
|
||||
rendered_html = template.render(rows=entries)
|
||||
|
||||
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
|
||||
tmp_html_path = tmp_file.name
|
||||
tmp_file.write(rendered_html.encode("utf-8"))
|
||||
tmp_file.flush()
|
||||
|
||||
# Sanitize filename (remove/replace special characters)
|
||||
safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
|
||||
pdf_filename = f"{safe_locality}.pdf"
|
||||
pdf_path = os.path.join(output_dir, pdf_filename)
|
||||
|
||||
page = context.new_page()
|
||||
page.goto(f"file://{tmp_html_path}")
|
||||
page.pdf(
|
||||
path=pdf_path,
|
||||
format="letter",
|
||||
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
|
||||
)
|
||||
os.remove(tmp_html_path)
|
||||
print(f"✅ Generated PDF: {pdf_path}")
|
||||
|
||||
browser.close()
|
||||
|
||||
# 📦 Zip the output folder
|
||||
shutil.make_archive(output_dir, 'zip', output_dir)
|
||||
print(f"📁 Zipped folder created: {zip_filename}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
Reference in New Issue
Block a user