You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

340 lines
9.7 KiB
Python

import argparse
import csv
import re
import tempfile
import jinja2
import os
import shutil
import zipfile
from playwright.sync_api import sync_playwright
from itertools import groupby
def is_number(s):
"""Returns True if s can be interpreted as a float."""
try:
float(s)
return True
except ValueError:
return False
def ensure_colon(heading: str) -> str:
"""Append ':' only if heading does not already end with one."""
heading = heading.strip()
if not heading.endswith(":"):
heading += ":"
return heading
def main():
parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
help="Which browser engine to use (default: chromium).")
args = parser.parse_args()
input_tsv = args.input_tsv
output_pdf_folder = args.output_pdf_folder
# 1. Read the TSV data
with open(input_tsv, 'r', encoding='utf-8-sig') as f:
reader = csv.DictReader(f, delimiter='\t')
rows = list(reader)
# 2. Sort rows by sending locality first, then by name
rows = sorted(rows, key=lambda r: (
r.get("Sending locality:", "").strip(),
r.get("Name (last, first): ", "").strip()
))
# Known columns to skip
skip_cols = {
'Timestamp',
'Email Address',
'Name (last, first):',
'Sending locality:'
}
# 3. Process each row into a data structure for Jinja2
processed_rows = []
for row in rows:
name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
locality = row.get('Sending locality:', '').strip()
q_and_a = []
for col in row.keys():
if col in skip_cols:
continue
col_title = col.strip()
value = row[col].strip()
if not value:
continue
# Check if "Comments" column
if re.match(r'^Comments\s\d+$', col_title, re.IGNORECASE):
# e.g. "Comments 1" => italic "Comments:"
q_and_a.append({
"type": "comments",
"label": "Comments",
"value": value
})
else:
# Normal question
if is_number(value):
# Numeric => "Truth: 4" on one line
q_and_a.append({
"type": "inline",
"label": ensure_colon(col_title),
"value": value
})
else:
# Non-numeric => label on one line, answer on next
q_and_a.append({
"type": "block",
"label": col_title,
"value": value
})
processed_rows.append({
"name": name,
"locality": locality,
"entries": q_and_a
})
processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby
grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
# 4. Create an HTML template (Jinja2)
# We'll use page-break-after so each row is on a new PDF page.
html_template_str = r"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Data Report</title>
<style>
@page {
size: Letter;
margin: 50px;
}
body {
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
font-size: 11pt;
line-height: 1.5;
margin: 0;
padding: 0;
color: #2c3e50;
}
.trainee-page {
margin-bottom: 2em;
page-break-after: always;
}
h2 {
font-size: 14pt;
margin: 0;
color: #4a5568;
}
p {
margin: 0.5em 0;
white-space: pre-wrap;
}
.locality {
margin-bottom: 0 !important;
}
.locality-label {
font-size: 1.15rem;
color: #4a5568;
margin: 0;
}
.locality-value {
font-size: 1.15rem;
margin: 0;
font-weight: bold;
color: #4a5568;
}
.name-label {
font-size: 1.5rem;
color: #4a5568;
margin: 0;
}
.name-value {
font-size: 1.5rem;
font-weight: bold;
color: #165f86;
}
.comments-label {
font-weight: 500;
margin-top: 1em;
margin-bottom: 0.2em;
color: #7c3aed;
font-size: 10.5pt;
}
.comments-value {
margin-top: 0;
color: #6c757d;
font-style: italic;
}
.bold {
font-weight: bold;
color: #2d3748;
}
.evaluation-header {
font-weight: bold;
margin: 1em 0 0.5em 0;
color: #2d3748;
font-size: 11pt;
}
.evaluation-item {
margin: 0.2em 0;
}
.evaluation-item .bold {
color: #1a365d;
font-weight: 600;
min-width: 110px;
display: inline-block;
font-size: 10.5pt;
text-transform: uppercase;
letter-spacing: 0.3px;
}
.block-label {
font-weight: bold;
margin-top: 1em;
margin-bottom: 0.3em;
}
.block-value {
margin-bottom: 0.5em;
color: #4a5568;
}
.entry-value {
color: #059669;
font-weight: 600;
font-size: 12pt;
}
.line-separator {
border: none;
border-top: 1px solid rgba(0, 0, 0, 0.1);
margin-bottom: 1em;
}
</style>
</head>
<body>
{% for row in rows %}
<div class="trainee-page">
<p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
{% for entry in row.entries %}
{% if entry.type == "comments" %}
<p class="comments-label">Comments:</p>
<p class="comments-value">{{ entry.value }}</p>
{% elif entry.type == "inline" %}
<div class="inline-answer">
{% if "Column 4" in entry.label %}
<p class="evaluation-header">Please evaluate your progress for this past term (1-5, 5 indicating the most progress):</p>
<p class="evaluation-item"><span class="bold">Truth:</span> <span class="entry-value">{{ entry.value }}</span></p>
{% elif "Column 6" in entry.label %}
<p class="evaluation-item"><span class="bold">Life:</span> <span class="entry-value">{{ entry.value }}</span></p>
{% elif "Column 8" in entry.label %}
<p class="evaluation-item"><span class="bold">Gospel:</span> <span class="entry-value">{{ entry.value }}</span></p>
{% elif "Column 10" in entry.label %}
<p class="evaluation-item"><span class="bold">Character:</span> <span class="entry-value">{{ entry.value }}</span></p>
{% elif "Column 12" in entry.label %}
<p class="evaluation-item"><span class="bold">Service:</span> <span class="entry-value">{{ entry.value }}</span></p>
{% else %}
<span class="bold">{{ entry.label }}</span>
<p class="entry-value">{{ entry.value }}</p>
{% endif %}
</div>
{% else %}
{% if "Name" in entry.label %}
<p class="name-label">{{ entry.label }}<span class="name-value"> {{ entry.value }}</span></p>
{% elif "Column" in entry.label %}
<p class="comments-label">Comments:</p>
<p class="comments-value">{{ entry.value }}</p>
<hr class="line-separator">
{% else %}
<p class="block-label">{{ entry.label }}</p>
<p class="comments-value block-value">{{ entry.value }}</p>
{% endif %}
{% endif %}
{% endfor %}
</div>
{% endfor %}
</body>
</html>
"""
# Define the output folder and zip filename
output_dir = output_pdf_folder
zip_filename = f"{output_dir}.zip"
# Remove existing output zip file if it exists
if os.path.exists(zip_filename):
os.remove(zip_filename)
# Create or reset the output folder
if os.path.exists(output_dir):
shutil.rmtree(output_dir)
os.makedirs(output_dir, exist_ok=True)
# Render and export one PDF per locality
template = jinja2.Template(html_template_str)
with sync_playwright() as p:
browser = p.__getattribute__(args.browser).launch()
context = browser.new_context()
for locality, group in grouped_rows:
entries = list(group) # Materialize the group iterator
rendered_html = template.render(rows=entries)
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
tmp_html_path = tmp_file.name
tmp_file.write(rendered_html.encode("utf-8"))
tmp_file.flush()
# Sanitize filename (remove/replace special characters)
safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
pdf_filename = f"{safe_locality}.pdf"
pdf_path = os.path.join(output_dir, pdf_filename)
page = context.new_page()
page.goto(f"file://{tmp_html_path}")
page.pdf(
path=pdf_path,
format="letter",
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
)
os.remove(tmp_html_path)
print(f"✅ Generated PDF: {pdf_path}")
browser.close()
# 📦 Zip the output folder
shutil.make_archive(output_dir, 'zip', output_dir)
print(f"📁 Zipped folder created: {zip_filename}")
if __name__ == "__main__":
main()