end_of_term_surveys/generate_pdf_emoji_support.py

import argparse
import csv
import re
import tempfile
import jinja2
import os
import shutil
import zipfile
from playwright.sync_api import sync_playwright
from itertools import groupby

def is_number(s):
    """Returns True if s can be interpreted as a float."""
    try:
        float(s)
        return True
    except ValueError:
        return False

def ensure_colon(heading: str) -> str:
    """Append ':' only if heading does not already end with one."""
    heading = heading.strip()
    if not heading.endswith(":"):
        heading += ":"
    return heading

def main():
    parser = argparse.ArgumentParser(description="Generate a file of PDF reports from a TSV input using Playwright.")
    parser.add_argument("-i", "--input_tsv", required=True, help="Path to the input TSV file.")
    parser.add_argument("-o", "--output_pdf_folder", required=True, help="Path to the output PDF folder.")
    parser.add_argument("--browser", choices=["chromium", "firefox", "webkit"], default="chromium",
                        help="Which browser engine to use (default: chromium).")
    args = parser.parse_args()

    input_tsv = args.input_tsv
    output_pdf_folder = args.output_pdf_folder

    # 1. Read the TSV data
    with open(input_tsv, 'r', encoding='utf-8-sig') as f:
        reader = csv.DictReader(f, delimiter='\t')
        rows = list(reader)

    # 2. Sort rows by sending locality first, then by name
    rows = sorted(rows, key=lambda r: (
        r.get("Sending locality:", "").strip(),
        r.get("Name (last, first): ", "").strip()
    ))

    # Known columns to skip
    skip_cols = {
        'Timestamp',
        'Email Address',
        'Name (last, first):',
        'Sending locality:'
    }

    # 3. Process each row into a data structure for Jinja2
    processed_rows = []
    for row in rows:
        name = row.get('Name (last, first): ', '').strip() # Unused, maybe refactor
        locality = row.get('Sending locality:', '').strip()

        q_and_a = []
        for col in row.keys():
            if col in skip_cols:
                continue

            col_title = col.strip()
            value = row[col].strip()
            if not value:
                continue

            # Check if "Comments" column
            if re.match(r'^Comments\s\d+$', col_title, re.IGNORECASE):
                # e.g. "Comments 1" => italic "Comments:"
                q_and_a.append({
                    "type": "comments",
                    "label": "Comments",
                    "value": value
                })
            else:
                # Normal question
                if is_number(value):
                    # Numeric => "Truth: 4" on one line
                    q_and_a.append({
                        "type": "inline",
                        "label": ensure_colon(col_title),
                        "value": value
                    })
                else:
                    # Non-numeric => label on one line, answer on next
                    q_and_a.append({
                        "type": "block",
                        "label": col_title,
                        "value": value
                    })

        processed_rows.append({
            "name": name,
            "locality": locality,
            "entries": q_and_a
        })

    processed_rows = sorted(processed_rows, key=lambda r: r["locality"])  # required for groupby
    grouped_rows = groupby(processed_rows, key=lambda r: r["locality"])

    # 4. Create an HTML template (Jinja2)
    #    We'll use page-break-after so each row is on a new PDF page.
    html_template_str = r"""
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8">
  <title>Data Report</title>
  <style>
    @page {
      size: Letter;
      margin: 50px;
    }

    body {
      font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, 'Helvetica Neue', Arial, sans-serif;
      font-size: 11pt;
      line-height: 1.5;
      margin: 0;
      padding: 0;
      color: #2c3e50;
    }

    .trainee-page {
      margin-bottom: 2em;
      page-break-after: always;
    }

    h2 {
      font-size: 14pt;
      margin: 0;
      color: #4a5568;
    }

    p {
      margin: 0.5em 0;
      white-space: pre-wrap;
    }

    .locality {
      margin-bottom: 0 !important;
    }

    .locality-label {
      font-size: 1.15rem;
      color: #4a5568;
      margin: 0;
    }

    .locality-value {
      font-size: 1.15rem;
      margin: 0;
      font-weight: bold;
      color: #4a5568;
    }

    .name-label {
      font-size: 1.5rem;
      color: #4a5568;
      margin: 0;
    }

    .name-value {
      font-size: 1.5rem;
      font-weight: bold;
      color: #165f86;
    }

    .comments-label {
      font-weight: 500;
      margin-top: 1em;
      margin-bottom: 0.2em;
      color: #7c3aed;
      font-size: 10.5pt;
    }

    .comments-value {
      margin-top: 0;
      color: #6c757d;
      font-style: italic;
    }

    .bold {
      font-weight: bold;
      color: #2d3748;
    }

    .evaluation-header {
      font-weight: bold;
      margin: 1em 0 0.5em 0;
      color: #2d3748;
      font-size: 11pt;
    }

    .evaluation-item {
      margin: 0.2em 0;
    }

    .evaluation-item .bold {
      color: #1a365d;
      font-weight: 600;
      min-width: 110px;
      display: inline-block;
      font-size: 10.5pt;
      text-transform: uppercase;
      letter-spacing: 0.3px;
    }

    .block-label {
      font-weight: bold;
      margin-top: 1em;
      margin-bottom: 0.3em;
    }

    .block-value {
      margin-bottom: 0.5em;
      color: #4a5568;
    }

    .entry-value {
      color: #059669;
      font-weight: 600;
      font-size: 12pt;
    }

    .line-separator {
      border: none;
      border-top: 1px solid rgba(0, 0, 0, 0.1);
      margin-bottom: 1em;
    }
  </style>
</head>

<body>
  {% for row in rows %}
  <div class="trainee-page">
    <p class="locality"><span class="locality-label">Sending Locality: </span><span class="locality-value">{{ row.locality }}</span></p>

    {% for entry in row.entries %}
      {% if entry.type == "comments" %}
        <p class="comments-label">Comments:</p>
        <p class="comments-value">{{ entry.value }}</p>

      {% elif entry.type == "inline" %}
        <div class="inline-answer">
          {% if "Column 4" in entry.label %}
            <p class="evaluation-header">Please evaluate your progress for this past term (1-5, 5 indicating the most progress):</p>
            <p class="evaluation-item"><span class="bold">Truth:</span> <span class="entry-value">{{ entry.value }}</span></p>
          {% elif "Column 6" in entry.label %}
            <p class="evaluation-item"><span class="bold">Life:</span> <span class="entry-value">{{ entry.value }}</span></p>
          {% elif "Column 8" in entry.label %}
            <p class="evaluation-item"><span class="bold">Gospel:</span> <span class="entry-value">{{ entry.value }}</span></p>
          {% elif "Column 10" in entry.label %}
            <p class="evaluation-item"><span class="bold">Character:</span> <span class="entry-value">{{ entry.value }}</span></p>
          {% elif "Column 12" in entry.label %}
            <p class="evaluation-item"><span class="bold">Service:</span> <span class="entry-value">{{ entry.value }}</span></p>
          {% else %}
            <span class="bold">{{ entry.label }}</span>
            <p class="entry-value">{{ entry.value }}</p>
          {% endif %}
        </div>
      {% else %}
        {% if "Name" in entry.label %}
          <p class="name-label">{{ entry.label }}<span class="name-value"> {{ entry.value }}</span></p>
        {% elif "Column" in entry.label %}
            <p class="comments-label">Comments:</p>
            <p class="comments-value">{{ entry.value }}</p>
            <hr class="line-separator">
        {% else %}
          <p class="block-label">{{ entry.label }}</p>
          <p class="comments-value block-value">{{ entry.value }}</p>
        {% endif %}
      {% endif %}
    {% endfor %}
  </div>
  {% endfor %}
</body>
</html>
"""

    # Define the output folder and zip filename
    output_dir = output_pdf_folder
    zip_filename = f"{output_dir}.zip"

    # Remove existing output zip file if it exists
    if os.path.exists(zip_filename):
      os.remove(zip_filename)

    # Create or reset the output folder
    if os.path.exists(output_dir):
        shutil.rmtree(output_dir)
    os.makedirs(output_dir, exist_ok=True)

    # Render and export one PDF per locality
    template = jinja2.Template(html_template_str)

    with sync_playwright() as p:
        browser = p.__getattribute__(args.browser).launch()
        context = browser.new_context()

        for locality, group in grouped_rows:
            entries = list(group)  # Materialize the group iterator

            rendered_html = template.render(rows=entries)

            with tempfile.NamedTemporaryFile(suffix=".html", delete=False) as tmp_file:
                tmp_html_path = tmp_file.name
                tmp_file.write(rendered_html.encode("utf-8"))
                tmp_file.flush()

            # Sanitize filename (remove/replace special characters)
            safe_locality = re.sub(r"[^\w\-_. ]", "_", locality).strip().replace(" ", "_")
            pdf_filename = f"{safe_locality}.pdf"
            pdf_path = os.path.join(output_dir, pdf_filename)

            page = context.new_page()
            page.goto(f"file://{tmp_html_path}")
            page.pdf(
                path=pdf_path,
                format="letter",
                margin={"top": "0.75in", "right": "0.75in", "bottom": "0.75in", "left": "0.75in"}
            )
            os.remove(tmp_html_path)
            print(f"✅ Generated PDF: {pdf_path}")

        browser.close()

    # 📦 Zip the output folder
    shutil.make_archive(output_dir, 'zip', output_dir)
    print(f"📁 Zipped folder created: {zip_filename}")

if __name__ == "__main__":
    main()