You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
340 lines
9.7 KiB
Python
340 lines
9.7 KiB
Python
import argparse
|
|
import csv
|
|
import re
|
|
import tempfile
|
|
import jinja2
|
|
import os
|
|
import shutil
|
|
import zipfile
|
|
from playwright.sync_api import sync_playwright
|
|
from itertools import groupby
|
|
|
|
def is_number(s):
|
|
"""Returns True if s can be interpreted as a float."""
|
|
try:
|
|
float(s)
|
|
return True
|
|
except ValueError:
|
|
return False
|
|
|
|
def ensure_colon(heading: str) -> str:
|
|
"""Append ':' only if heading does not already end with one."""
|
|
heading = heading.strip()
|
|
if not heading.endswith(":"):
|
|
heading += ":"
|
|
return heading
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
|
|
parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
|
|
parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
|
|
parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
|
|
help="Which browser engine to use (default: chromium).")
|
|
args = parser.parse_args()
|
|
|
|
input_tsv = args.input_tsv
|
|
output_pdf_folder = args.output_pdf_folder
|
|
|
|
# 1. Read the TSV data
|
|
with open(input_tsv, 'r', encoding='utf-8-sig') as f:
|
|
reader = csv.DictReader(f, delimiter='\t')
|
|
rows = list(reader)
|
|
|
|
# 2. Sort rows by sending locality first, then by name
|
|
rows = sorted(rows, key=lambda r: (
|
|
r.get("Sending locality:", "").strip(),
|
|
r.get("Name (last, first): ", "").strip()
|
|
))
|
|
|
|
# Known columns to skip
|
|
skip_cols = {
|
|
'Timestamp',
|
|
'Email Address',
|
|
'Name (last, first):',
|
|
'Sending locality:'
|
|
}
|
|
|
|
# 3. Process each row into a data structure for Jinja2
|
|
processed_rows = []
|
|
for row in rows:
|
|
name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
|
|
locality = row.get('Sending locality:', '').strip()
|
|
|
|
q_and_a = []
|
|
for col in row.keys():
|
|
if col in skip_cols:
|
|
continue
|
|
|
|
col_title = col.strip()
|
|
value = row[col].strip()
|
|
if not value:
|
|
continue
|
|
|
|
# Check if "Comments" column
|
|
if re.match(r'^Comments\s\d+$', col_title, re.IGNORECASE):
|
|
# e.g. "Comments 1" => italic "Comments:"
|
|
q_and_a.append({
|
|
"type": "comments",
|
|
"label": "Comments",
|
|
"value": value
|
|
})
|
|
else:
|
|
# Normal question
|
|
if is_number(value):
|
|
# Numeric => "Truth: 4" on one line
|
|
q_and_a.append({
|
|
"type": "inline",
|
|
"label": ensure_colon(col_title),
|
|
"value": value
|
|
})
|
|
else:
|
|
# Non-numeric => label on one line, answer on next
|
|
q_and_a.append({
|
|
"type": "block",
|
|
"label": col_title,
|
|
"value": value
|
|
})
|
|
|
|
processed_rows.append({
|
|
"name": name,
|
|
"locality": locality,
|
|
"entries": q_and_a
|
|
})
|
|
|
|
processed_rows = sorted(processed_rows, key=lambda r: r["locality"]) # required for groupby
|
|
grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])
|
|
|
|
# 4. Create an HTML template (Jinja2)
|
|
# We'll use page-break-after so each row is on a new PDF page.
|
|
html_template_str = r"""
|
|
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<title>Data Report</title>
|
|
<style>
|
|
@page {
|
|
size: Letter;
|
|
margin: 50px;
|
|
}
|
|
|
|
body {
|
|
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
|
|
font-size: 11pt;
|
|
line-height: 1.5;
|
|
margin: 0;
|
|
padding: 0;
|
|
color: #2c3e50;
|
|
}
|
|
|
|
.trainee-page {
|
|
margin-bottom: 2em;
|
|
page-break-after: always;
|
|
}
|
|
|
|
h2 {
|
|
font-size: 14pt;
|
|
margin: 0;
|
|
color: #4a5568;
|
|
}
|
|
|
|
p {
|
|
margin: 0.5em 0;
|
|
white-space: pre-wrap;
|
|
}
|
|
|
|
.locality {
|
|
margin-bottom: 0 !important;
|
|
}
|
|
|
|
.locality-label {
|
|
font-size: 1.15rem;
|
|
color: #4a5568;
|
|
margin: 0;
|
|
}
|
|
|
|
.locality-value {
|
|
font-size: 1.15rem;
|
|
margin: 0;
|
|
font-weight: bold;
|
|
color: #4a5568;
|
|
}
|
|
|
|
.name-label {
|
|
font-size: 1.5rem;
|
|
color: #4a5568;
|
|
margin: 0;
|
|
}
|
|
|
|
.name-value {
|
|
font-size: 1.5rem;
|
|
font-weight: bold;
|
|
color: #165f86;
|
|
}
|
|
|
|
.comments-label {
|
|
font-weight: 500;
|
|
margin-top: 1em;
|
|
margin-bottom: 0.2em;
|
|
color: #7c3aed;
|
|
font-size: 10.5pt;
|
|
}
|
|
|
|
.comments-value {
|
|
margin-top: 0;
|
|
color: #6c757d;
|
|
font-style: italic;
|
|
}
|
|
|
|
.bold {
|
|
font-weight: bold;
|
|
color: #2d3748;
|
|
}
|
|
|
|
.evaluation-header {
|
|
font-weight: bold;
|
|
margin: 1em 0 0.5em 0;
|
|
color: #2d3748;
|
|
font-size: 11pt;
|
|
}
|
|
|
|
.evaluation-item {
|
|
margin: 0.2em 0;
|
|
}
|
|
|
|
.evaluation-item .bold {
|
|
color: #1a365d;
|
|
font-weight: 600;
|
|
min-width: 110px;
|
|
display: inline-block;
|
|
font-size: 10.5pt;
|
|
text-transform: uppercase;
|
|
letter-spacing: 0.3px;
|
|
}
|
|
|
|
.block-label {
|
|
font-weight: bold;
|
|
margin-top: 1em;
|
|
margin-bottom: 0.3em;
|
|
}
|
|
|
|
.block-value {
|
|
margin-bottom: 0.5em;
|
|
color: #4a5568;
|
|
}
|
|
|
|
.entry-value {
|
|
color: #059669;
|
|
font-weight: 600;
|
|
font-size: 12pt;
|
|
}
|
|
|
|
.line-separator {
|
|
border: none;
|
|
border-top: 1px solid rgba(0, 0, 0, 0.1);
|
|
margin-bottom: 1em;
|
|
}
|
|
</style>
|
|
</head>
|
|
|
|
<body>
|
|
{% for row in rows %}
|
|
<div class="trainee-page">
|
|
<p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>
|
|
|
|
{% for entry in row.entries %}
|
|
{% if entry.type == "comments" %}
|
|
<p class="comments-label">Comments:</p>
|
|
<p class="comments-value">{{ entry.value }}</p>
|
|
|
|
{% elif entry.type == "inline" %}
|
|
<div class="inline-answer">
|
|
{% if "Column 4" in entry.label %}
|
|
<p class="evaluation-header">Please evaluate your progress for this past term (1-5, 5 indicating the most progress):</p>
|
|
<p class="evaluation-item"><span class="bold">Truth:</span> <span class="entry-value">{{ entry.value }}</span></p>
|
|
{% elif "Column 6" in entry.label %}
|
|
<p class="evaluation-item"><span class="bold">Life:</span> <span class="entry-value">{{ entry.value }}</span></p>
|
|
{% elif "Column 8" in entry.label %}
|
|
<p class="evaluation-item"><span class="bold">Gospel:</span> <span class="entry-value">{{ entry.value }}</span></p>
|
|
{% elif "Column 10" in entry.label %}
|
|
<p class="evaluation-item"><span class="bold">Character:</span> <span class="entry-value">{{ entry.value }}</span></p>
|
|
{% elif "Column 12" in entry.label %}
|
|
<p class="evaluation-item"><span class="bold">Service:</span> <span class="entry-value">{{ entry.value }}</span></p>
|
|
{% else %}
|
|
<span class="bold">{{ entry.label }}</span>
|
|
<p class="entry-value">{{ entry.value }}</p>
|
|
{% endif %}
|
|
</div>
|
|
{% else %}
|
|
{% if "Name" in entry.label %}
|
|
<p class="name-label">{{ entry.label }}<span class="name-value"> {{ entry.value }}</span></p>
|
|
{% elif "Column" in entry.label %}
|
|
<p class="comments-label">Comments:</p>
|
|
<p class="comments-value">{{ entry.value }}</p>
|
|
<hr class="line-separator">
|
|
{% else %}
|
|
<p class="block-label">{{ entry.label }}</p>
|
|
<p class="comments-value block-value">{{ entry.value }}</p>
|
|
{% endif %}
|
|
{% endif %}
|
|
{% endfor %}
|
|
</div>
|
|
{% endfor %}
|
|
</body>
|
|
</html>
|
|
"""
|
|
|
|
# Define the output folder and zip filename
|
|
output_dir = output_pdf_folder
|
|
zip_filename = f"{output_dir}.zip"
|
|
|
|
# Remove existing output zip file if it exists
|
|
if os.path.exists(zip_filename):
|
|
os.remove(zip_filename)
|
|
|
|
# Create or reset the output folder
|
|
if os.path.exists(output_dir):
|
|
shutil.rmtree(output_dir)
|
|
os.makedirs(output_dir, exist_ok=True)
|
|
|
|
# Render and export one PDF per locality
|
|
template = jinja2.Template(html_template_str)
|
|
|
|
with sync_playwright() as p:
|
|
browser = p.__getattribute__(args.browser).launch()
|
|
context = browser.new_context()
|
|
|
|
for locality, group in grouped_rows:
|
|
entries = list(group) # Materialize the group iterator
|
|
|
|
rendered_html = template.render(rows=entries)
|
|
|
|
with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
|
|
tmp_html_path = tmp_file.name
|
|
tmp_file.write(rendered_html.encode("utf-8"))
|
|
tmp_file.flush()
|
|
|
|
# Sanitize filename (remove/replace special characters)
|
|
safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
|
|
pdf_filename = f"{safe_locality}.pdf"
|
|
pdf_path = os.path.join(output_dir, pdf_filename)
|
|
|
|
page = context.new_page()
|
|
page.goto(f"file://{tmp_html_path}")
|
|
page.pdf(
|
|
path=pdf_path,
|
|
format="letter",
|
|
margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
|
|
)
|
|
os.remove(tmp_html_path)
|
|
print(f"✅ Generated PDF: {pdf_path}")
|
|
|
|
browser.close()
|
|
|
|
# 📦 Zip the output folder
|
|
shutil.make_archive(output_dir, 'zip', output_dir)
|
|
print(f"📁 Zipped folder created: {zip_filename}")
|
|
|
|
if __name__ == "__main__":
|
|
main()
|