Separate PDF's by locality
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,3 +1,4 @@
|
|||||||
*.pdf
|
*.pdf
|
||||||
*.tsv
|
*.tsv
|
||||||
*.csv
|
*.csv
|
||||||
|
*.zip
|
||||||
|
20
README.md
20
README.md
@ -4,12 +4,13 @@ This is how you can generate PDFs for End of Term Surveys. Some examples are pro
|
|||||||
|
|
||||||
## Usage
|
## Usage
|
||||||
|
|
||||||
|
<!-- Don't use this!!!
|
||||||
### If you do not want emoji support:
|
### If you do not want emoji support:
|
||||||
|
|
||||||
```
|
```
|
||||||
pip install fpdf2
|
pip install fpdf2
|
||||||
```
|
```
|
||||||
Use `generate_pdf.py` to convert a tsv to a pdf.
|
Use `generate_pdf.py` to convert a tsv to a pdf. -->
|
||||||
|
|
||||||
### If you do want emoji support:
|
### If you do want emoji support:
|
||||||
|
|
||||||
@ -17,24 +18,24 @@ Use `generate_pdf.py` to convert a tsv to a pdf.
|
|||||||
pip install playwright jinja2
|
pip install playwright jinja2
|
||||||
playwright install
|
playwright install
|
||||||
```
|
```
|
||||||
Use `generate_pdf_emoji_support.py` to convert a tsv to a pdf.
|
Use `generate_pdf_emoji_support.py` to convert a tsv into many PDF's separated by locality into a file.
|
||||||
|
|
||||||
## Usage Examples
|
## Usage Examples
|
||||||
|
|
||||||
```
|
<!-- ```
|
||||||
python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
|
python generate_pdf.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
|
||||||
```
|
``` -->
|
||||||
|
|
||||||
```
|
```
|
||||||
python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality).pdf"
|
python generate_pdf_emoji_support.py -i "F2024 End of Term Survey Responses.tsv" -o "F24 End of Term Survey (Sorted by Locality)"
|
||||||
```
|
```
|
||||||
|
|
||||||
```
|
<!-- ```
|
||||||
python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
|
python generate_pdf.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
|
||||||
```
|
``` -->
|
||||||
|
|
||||||
```
|
```
|
||||||
python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality).pdf"
|
python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv" -o "F24 Fourth Term Survey (Sorted by Locality)"
|
||||||
```
|
```
|
||||||
|
|
||||||
## Actual Usage Example
|
## Actual Usage Example
|
||||||
@ -47,4 +48,5 @@ python generate_pdf_emoji_support.py -i "F2024 Fourth Term Survey Responses.tsv"
|
|||||||
- at the end of Fourth Term Survey, it probably should be called "Further Comments"
|
- at the end of Fourth Term Survey, it probably should be called "Further Comments"
|
||||||
3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work.
|
3. Required columns: 'Timestamp', 'Email Address', 'Name (last, first):', 'Sending locality:'. WARNING: In previous terms, the column named "Name (last, first):" was called "Name (last, first): " with a space. You need to remove this space to make the output work.
|
||||||
|
|
||||||
|
## To transfer folder by email:
|
||||||
|
Folder must be zipped.
|
@ -4,6 +4,8 @@ import re
|
|||||||
import tempfile
|
import tempfile
|
||||||
import jinja2
|
import jinja2
|
||||||
import os
|
import os
|
||||||
|
import shutil
|
||||||
|
import zipfile
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
from itertools import groupby
|
from itertools import groupby
|
||||||
|
|
||||||
@ -23,15 +25,15 @@ def ensure_colon(heading: str) -> str:
|
|||||||
return heading
|
return heading
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Generate a PDF report from a TSV input using Playwright.")
|
parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
|
||||||
parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
|
parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
|
||||||
parser.add_argument("-o", "--output_pdf", required=True, help="Path to the output PDF file.")
|
parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
|
||||||
parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
|
parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
|
||||||
help="Which browser engine to use (default: chromium).")
|
help="Which browser engine to use (default: chromium).")
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
input_tsv = args.input_tsv
|
input_tsv = args.input_tsv
|
||||||
output_pdf = args.output_pdf
|
output_pdf_folder = args.output_pdf_folder
|
||||||
|
|
||||||
# 1. Read the TSV data
|
# 1. Read the TSV data
|
||||||
with open(input_tsv, 'r', encoding='utf-8-sig') as f:
|
with open(input_tsv, 'r', encoding='utf-8-sig') as f:
|
||||||
@ -43,9 +45,6 @@ def main():
|
|||||||
r.get("Sending locality:", "").strip(),
|
r.get("Sending locality:", "").strip(),
|
||||||
r.get("Name (last, first): ", "").strip()
|
r.get("Name (last, first): ", "").strip()
|
||||||
))
|
))
|
||||||
# Group rows by locality to generate separate pdfs
|
|
||||||
grouped_rows = groupby(rows, key=lambda r: r.get("Sending locality:", "").strip())
|
|
||||||
|
|
||||||
|
|
||||||
# Known columns to skip
|
# Known columns to skip
|
||||||
skip_cols = {
|
skip_cols = {
|
||||||
@ -58,7 +57,7 @@ def main():
|
|||||||
# 3. Process each row into a data structure for Jinja2
|
# 3. Process each row into a data structure for Jinja2
|
||||||
processed_rows = []
|
processed_rows = []
|
||||||
for row in rows:
|
for row in rows:
|
||||||
#name = row.get('Name (last, first):', '').strip() # TODO: Refactor: This isn't working because it's 'Name (last, first): '. Unused for now.
|
name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
|
||||||
locality = row.get('Sending locality:', '').strip()
|
locality = row.get('Sending locality:', '').strip()
|
||||||
|
|
||||||
q_and_a = []
|
q_and_a = []
|
||||||
@ -97,11 +96,14 @@ def main():
|
|||||||
})
|
})
|
||||||
|
|
||||||
processed_rows.append({
|
processed_rows.append({
|
||||||
# "name": name,
|
"name": name,
|
||||||
"locality": locality,
|
"locality": locality,
|
||||||
"entries": q_and_a
|
"entries": q_and_a
|
||||||
})
|
})
|
||||||
|
|
||||||
|
processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby
|
||||||
|
grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
|
||||||
|
|
||||||
# 4. Create an HTML template (Jinja2)
|
# 4. Create an HTML template (Jinja2)
|
||||||
# We'll use page-break-after so each row is on a new PDF page.
|
# We'll use page-break-after so each row is on a new PDF page.
|
||||||
html_template_str = r"""
|
html_template_str = r"""
|
||||||
@ -130,12 +132,6 @@ def main():
|
|||||||
page-break-after: always;
|
page-break-after: always;
|
||||||
}
|
}
|
||||||
|
|
||||||
h1 {
|
|
||||||
font-size: 18pt;
|
|
||||||
margin: 0 0 0.5em 0;
|
|
||||||
color: #1a365d;
|
|
||||||
}
|
|
||||||
|
|
||||||
h2 {
|
h2 {
|
||||||
font-size: 14pt;
|
font-size: 14pt;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
@ -244,7 +240,6 @@ def main():
|
|||||||
<body>
|
<body>
|
||||||
{% for row in rows %}
|
{% for row in rows %}
|
||||||
<div class="trainee-page">
|
<div class="trainee-page">
|
||||||
<h1>{{ row.name }}</h1>
|
|
||||||
<p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
|
<p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
|
||||||
|
|
||||||
{% for entry in row.entries %}
|
{% for entry in row.entries %}
|
||||||
@ -289,35 +284,56 @@ def main():
|
|||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 5. Render the template with Jinja2
|
# Define the output folder and zip filename
|
||||||
template = jinja2.Template(html_template_str)
|
output_dir = output_pdf_folder
|
||||||
rendered_html = template.render(rows=processed_rows)
|
zip_filename = f"{output_dir}.zip"
|
||||||
|
|
||||||
|
# Remove existing output zip file if it exists
|
||||||
|
if os.path.exists(zip_filename):
|
||||||
|
os.remove(zip_filename)
|
||||||
|
|
||||||
|
# Create or reset the output folder
|
||||||
|
if os.path.exists(output_dir):
|
||||||
|
shutil.rmtree(output_dir)
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# Render and export one PDF per locality
|
||||||
|
template = jinja2.Template(html_template_str)
|
||||||
|
|
||||||
|
with sync_playwright() as p:
|
||||||
|
browser = p.__getattribute__(args.browser).launch()
|
||||||
|
context = browser.new_context()
|
||||||
|
|
||||||
|
for locality, group in grouped_rows:
|
||||||
|
entries = list(group) # Materialize the group iterator
|
||||||
|
|
||||||
|
rendered_html = template.render(rows=entries)
|
||||||
|
|
||||||
# 6. Convert the HTML to PDF using Playwright
|
|
||||||
# We'll create a temporary HTML file, open it in a headless browser, and save as PDF.
|
|
||||||
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
|
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
|
||||||
tmp_html_path = tmp_file.name
|
tmp_html_path = tmp_file.name
|
||||||
tmp_file.write(rendered_html.encode("utf-8"))
|
tmp_file.write(rendered_html.encode("utf-8"))
|
||||||
tmp_file.flush()
|
tmp_file.flush()
|
||||||
|
|
||||||
with sync_playwright() as p:
|
# Sanitize filename (remove/replace special characters)
|
||||||
# launch the selected browser (chromium/firefox/webkit)
|
safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
|
||||||
browser = p.__getattribute__(args.browser).launch()
|
pdf_filename = f"{safe_locality}.pdf"
|
||||||
context = browser.new_context()
|
pdf_path = os.path.join(output_dir, pdf_filename)
|
||||||
|
|
||||||
page = context.new_page()
|
page = context.new_page()
|
||||||
|
|
||||||
# Load the local HTML file
|
|
||||||
page.goto(f"file://{tmp_html_path}")
|
page.goto(f"file://{tmp_html_path}")
|
||||||
|
|
||||||
# PDF Options: letter format, etc.
|
|
||||||
# For more options, see: https://playwright.dev/python/docs/api/class-page#pagepdfoptions
|
|
||||||
page.pdf(
|
page.pdf(
|
||||||
path=output_pdf,
|
path=pdf_path,
|
||||||
format="letter",
|
format="letter",
|
||||||
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
|
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
|
||||||
)
|
)
|
||||||
|
os.remove(tmp_html_path)
|
||||||
|
print(f"✅ Generated PDF: {pdf_path}")
|
||||||
|
|
||||||
browser.close()
|
browser.close()
|
||||||
|
|
||||||
|
# 📦 Zip the output folder
|
||||||
|
shutil.make_archive(output_dir, 'zip', output_dir)
|
||||||
|
print(f"📁 Zipped folder created: {zip_filename}")
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
main()
|
main()
|
||||||
|
Reference in New Issue
Block a user