mirror of
https://github.com/Klotzkette/claude-fuer-deutsches-recht
synced 2026-06-09 10:03:19 +00:00
v250 testakten als akten-dumps
This commit is contained in:
@@ -1,8 +1,8 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Baut für jede Testakte ein 'gesamt-pdf/<name>_gesamt.pdf', das alle
|
||||
Aktenstücke (MD/TXT/EML/CSV/XLSX/DOCX/Bilder/PDF) in ein einziges,
|
||||
sauber gerendertes Dokument mit kompaktem Aktenstart, Inhaltsverzeichnis und
|
||||
Seitenzahlen zusammenfasst.
|
||||
"""Baut für jede Testakte ein 'gesamt-pdf/<name>_gesamt.pdf', das die
|
||||
exportfaehigen Aktenstücke (MD/TXT/EML/CSV/XLSX/DOCX/Bilder/PDF) in ein
|
||||
einziges, sauber gerendertes Dokument mit Dateigrenzen und Seitenzahlen
|
||||
zusammenfasst.
|
||||
|
||||
Aufruf:
|
||||
python3 scripts/build-testakte-gesamt-pdf.py # alle Testakten
|
||||
@@ -41,6 +41,8 @@ from reportlab.pdfgen import canvas
|
||||
from reportlab.pdfbase import pdfmetrics
|
||||
from reportlab.pdfbase.ttfonts import TTFont
|
||||
|
||||
from testakte_file_filter import include_in_working_dump
|
||||
|
||||
# DOCX
|
||||
try:
|
||||
from docx import Document
|
||||
@@ -519,15 +521,11 @@ def no_header_footer(canv: canvas.Canvas, doc) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def build_cover(name: str, _readme_summary: str | None, h1: str | None = None) -> list:
|
||||
title = h1 if h1 else name
|
||||
# Das Gesamt-PDF soll wie eine Akte aufgehen, nicht wie eine README oder
|
||||
# Download-Seite. Download-, Release- und Testaktenhinweise bleiben in der
|
||||
# README; im PDF steht am Anfang nur der Aktenname.
|
||||
return [
|
||||
Paragraph(escape(title), s_cover_title),
|
||||
Spacer(1, 0.35 * cm),
|
||||
]
|
||||
def build_cover(_name: str, _readme_summary: str | None, h1: str | None = None) -> list:
|
||||
# Historisch gab es hier ein Titelblatt. Das Gesamt-PDF soll jetzt direkt
|
||||
# mit dem ersten Aktenstück beginnen; die Funktion bleibt als Kompatibilitaet
|
||||
# fuer aeltere Aufrufe erhalten.
|
||||
return []
|
||||
|
||||
|
||||
def extract_readme_summary(readme_path: Path) -> tuple[str | None, str | None]:
|
||||
@@ -582,12 +580,7 @@ def extract_readme_summary(readme_path: Path) -> tuple[str | None, str | None]:
|
||||
def collect_files(testakte_dir: Path) -> dict[str, list[Path]]:
|
||||
files_by_type: dict[str, list[Path]] = {t: [] for t in TYPE_ORDER}
|
||||
for f in testakte_dir.rglob("*"):
|
||||
if not f.is_file():
|
||||
continue
|
||||
# README und Gesamt-PDF ausschliessen
|
||||
if f.name == "README.md" and f.parent == testakte_dir:
|
||||
continue
|
||||
if "gesamt-pdf" in f.parts:
|
||||
if not include_in_working_dump(f, testakte_dir):
|
||||
continue
|
||||
ext = f.suffix.lower().lstrip(".")
|
||||
if ext in IMAGE_EXTS:
|
||||
@@ -613,33 +606,14 @@ def build_text_pdf(testakte_dir: Path, files: dict[str, list[Path]], cover: list
|
||||
)
|
||||
flow = list(cover)
|
||||
|
||||
# Inhaltsverzeichnis (rudimentaer)
|
||||
toc_rows: list[list] = [["Teil", "Inhalt"]]
|
||||
teil_no = 1
|
||||
for t in TYPE_ORDER:
|
||||
if not files[t]:
|
||||
continue
|
||||
toc_rows.append([f"Teil {teil_no}", f"{TYPE_LABEL[t]} ({len(files[t])})"])
|
||||
teil_no += 1
|
||||
if len(toc_rows) > 1:
|
||||
flow.append(Paragraph("Inhaltsverzeichnis", s_h1))
|
||||
flow.append(Spacer(1, 8))
|
||||
flow.extend(_render_table(toc_rows, header=True))
|
||||
flow.append(PageBreak())
|
||||
|
||||
pdf_attachments: list[Path] = []
|
||||
teil_no = 1
|
||||
for t in TYPE_ORDER:
|
||||
if not files[t]:
|
||||
continue
|
||||
if t == "pdf":
|
||||
# PDFs werden separat angehaengt (Original-Layout bewahren)
|
||||
pdf_attachments = files[t]
|
||||
teil_no += 1
|
||||
continue
|
||||
flow.append(Paragraph(f"Teil {teil_no} — {TYPE_LABEL[t]}", s_partlabel))
|
||||
flow.append(Paragraph(TYPE_LABEL[t], s_h1))
|
||||
flow.append(Spacer(1, 4))
|
||||
for f in files[t]:
|
||||
rel = f.relative_to(testakte_dir)
|
||||
flow.append(Paragraph(f"<b>Datei:</b> {escape(str(rel))}", s_meta))
|
||||
@@ -662,15 +636,11 @@ def build_text_pdf(testakte_dir: Path, files: dict[str, list[Path]], cover: list
|
||||
except Exception as e:
|
||||
flow.append(Paragraph(f"<i>Inhalt konnte nicht gerendert werden: {escape(str(e))}</i>", s_meta))
|
||||
flow.append(Spacer(1, 14))
|
||||
flow.append(PageBreak())
|
||||
teil_no += 1
|
||||
if flow:
|
||||
flow.append(PageBreak())
|
||||
|
||||
if len(flow) == len(cover) + 1:
|
||||
# Nichts ausser Cover -> trotzdem bauen, aber Hinweis
|
||||
flow.append(Paragraph(
|
||||
"Diese Arbeitsakte enthält keine renderbaren Inhalte ausserhalb der angefuegten PDFs.",
|
||||
s_body,
|
||||
))
|
||||
if not flow:
|
||||
flow.append(Paragraph("Dateiablage: Original-PDFs folgen.", s_meta))
|
||||
|
||||
hf = header_footer_factory(testakte_dir.name)
|
||||
try:
|
||||
@@ -686,17 +656,17 @@ def append_pdf_with_separator(writer: PdfWriter, label: str, pdf_path: Path, tes
|
||||
c = canvas.Canvas(sep, pagesize=A4)
|
||||
c.setTitle(label)
|
||||
c.setAuthor("Kanzleiakte")
|
||||
c.setFont(FONT_BOLD, 16)
|
||||
c.setFont(FONT_BOLD, 14)
|
||||
c.setFillColor(TEAL)
|
||||
c.drawString(2 * cm, 25 * cm, label)
|
||||
c.drawString(2 * cm, 25 * cm, "Datei")
|
||||
c.setFont(FONT_REG, 9)
|
||||
c.setFillColor(MUTED)
|
||||
c.drawString(2 * cm, 24.2 * cm, f"Datei: {pdf_path.name}")
|
||||
c.drawString(2 * cm, 24.2 * cm, label)
|
||||
c.setStrokeColor(BORDER)
|
||||
c.setLineWidth(0.3)
|
||||
c.line(2 * cm, 1.6 * cm, 19 * cm, 1.6 * cm)
|
||||
c.setFont(FONT_REG, 8)
|
||||
c.drawString(2 * cm, 1.2 * cm, f"Arbeitsakte: {testakte_name}")
|
||||
c.drawString(2 * cm, 1.2 * cm, testakte_name)
|
||||
c.showPage()
|
||||
c.save()
|
||||
sep.seek(0)
|
||||
@@ -730,8 +700,7 @@ def build_gesamt_pdf(testakte_dir: Path) -> tuple[str, str]:
|
||||
if total_files == 0:
|
||||
return "skip", "keine Quelldateien"
|
||||
|
||||
h1, summary = extract_readme_summary(testakte_dir / "README.md")
|
||||
cover = build_cover(name, summary, h1)
|
||||
cover: list = []
|
||||
|
||||
tmp_text = Path(f"/tmp/_gesamt_text_{name}.pdf")
|
||||
ok, pdf_attachments = build_text_pdf(testakte_dir, files, cover, tmp_text)
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Baut die Testakten-ZIPs fuer Releases.
|
||||
|
||||
Die ZIPs enthalten die Arbeitsdateien und das Gesamt-PDF, aber keine
|
||||
repo-internen README-, Download- oder Vorfuehrseiten. Damit entspricht der
|
||||
Download eher einem echten Aktenordner als einer Demo-Mappe.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import sys
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
from testakte_file_filter import include_in_working_dump
|
||||
|
||||
REPO_ROOT = Path(__file__).resolve().parent.parent
|
||||
TESTAKTEN = REPO_ROOT / "testakten"
|
||||
|
||||
|
||||
def iter_export_files(testakte_dir: Path):
|
||||
for path in sorted(testakte_dir.rglob("*"), key=lambda p: str(p.relative_to(testakte_dir)).lower()):
|
||||
if include_in_working_dump(path, testakte_dir, include_gesamt_pdf=True):
|
||||
yield path
|
||||
|
||||
|
||||
def add_testakte(zipf: zipfile.ZipFile, testakte_dir: Path) -> int:
|
||||
count = 0
|
||||
for path in iter_export_files(testakte_dir):
|
||||
arcname = Path(testakte_dir.name) / path.relative_to(testakte_dir)
|
||||
zipf.write(path, arcname.as_posix())
|
||||
count += 1
|
||||
return count
|
||||
|
||||
|
||||
def build_single(testakte_dir: Path, dist: Path) -> tuple[Path, int]:
|
||||
out = dist / f"testakte-{testakte_dir.name}.zip"
|
||||
with zipfile.ZipFile(out, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
|
||||
count = add_testakte(zipf, testakte_dir)
|
||||
return out, count
|
||||
|
||||
|
||||
def main() -> None:
|
||||
dist = Path(sys.argv[1]) if len(sys.argv) > 1 else REPO_ROOT / "dist"
|
||||
dist.mkdir(parents=True, exist_ok=True)
|
||||
dirs = sorted(d for d in TESTAKTEN.iterdir() if d.is_dir())
|
||||
if not dirs:
|
||||
print("Keine Testakten gefunden.")
|
||||
return
|
||||
|
||||
total_files = 0
|
||||
for d in dirs:
|
||||
out, count = build_single(d, dist)
|
||||
if count == 0:
|
||||
raise SystemExit(f"{d}: keine exportfaehigen Dateien")
|
||||
total_files += count
|
||||
print(f"Baue {out.name}: {count} Dateien")
|
||||
|
||||
all_out = dist / "alle-testakten.zip"
|
||||
with zipfile.ZipFile(all_out, "w", compression=zipfile.ZIP_DEFLATED) as zipf:
|
||||
all_count = 0
|
||||
for d in dirs:
|
||||
all_count += add_testakte(zipf, d)
|
||||
print(f"Baue {all_out.name}: {all_count} Dateien aus {len(dirs)} Testakten")
|
||||
print(f"Fertig: {len(dirs)} Einzel-ZIPs, {total_files} Dateien")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Gemeinsamer Filter fuer Testakten-Exportdateien.
|
||||
|
||||
Die Repo-README und redaktionelle Uebersichten sind fuer GitHub wichtig, sollen
|
||||
aber nicht in den Arbeitsmaterial-Dump gelangen. Gesamt-PDFs und Testakten-ZIPs
|
||||
muessen wie eine Anwaltsakte aufgehen: Aktenstuecke, Anlagen, Mails, Tabellen,
|
||||
Bilder und Original-PDFs, aber keine Vorfuehr- oder Download-Hinweise.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
TEXT_EXTS = {".md", ".txt", ".csv", ".eml"}
|
||||
|
||||
META_MARKERS = (
|
||||
"demonstrationsakte",
|
||||
"demonstrations-testakte",
|
||||
"demonstrationszweck",
|
||||
"plugin-test",
|
||||
"plugin-testing",
|
||||
"plugin-testsystem",
|
||||
"plugin demonstration",
|
||||
"plugin-demonstration",
|
||||
"demo-akte",
|
||||
"vorfuehrziel",
|
||||
"vorführziel",
|
||||
"testzweck",
|
||||
"ausschließlich testzwecken",
|
||||
"ausschliesslich testzwecken",
|
||||
"nur zu testzwecken",
|
||||
"diese akte eignet sich",
|
||||
"direkt-download",
|
||||
"download der akte",
|
||||
"github-release",
|
||||
)
|
||||
|
||||
META_NAME_PARTS = (
|
||||
"readme",
|
||||
"qualitaetsstandard",
|
||||
"qualitätsstandard",
|
||||
"direkt-download",
|
||||
"download",
|
||||
)
|
||||
|
||||
INITIAL_OVERVIEW_PARTS = (
|
||||
"aktenuebersicht",
|
||||
"aktenübersicht",
|
||||
"akte-uebersicht",
|
||||
"akte-übersicht",
|
||||
"soforttriage",
|
||||
)
|
||||
|
||||
|
||||
def _safe_text(path: Path, limit: int = 80_000) -> str:
|
||||
try:
|
||||
return path.read_text(encoding="utf-8", errors="ignore")[:limit].lower()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
def _is_initial_overview(path: Path, testakte_dir: Path) -> bool:
|
||||
try:
|
||||
rel = path.relative_to(testakte_dir)
|
||||
except ValueError:
|
||||
return False
|
||||
if len(rel.parts) != 1:
|
||||
return False
|
||||
stem = path.stem.lower()
|
||||
compact = stem.replace("_", "-")
|
||||
starts_as_front_piece = compact.startswith(("00-", "01-", "00.", "01."))
|
||||
return starts_as_front_piece and any(part in stem for part in INITIAL_OVERVIEW_PARTS)
|
||||
|
||||
|
||||
def is_export_meta_file(path: Path, testakte_dir: Path) -> bool:
|
||||
"""True, wenn die Datei nicht in PDF/ZIP-Arbeitsmaterial gehoert."""
|
||||
name = path.name.lower()
|
||||
stem = path.stem.lower()
|
||||
if name == "readme.md":
|
||||
return True
|
||||
if any(part in stem for part in META_NAME_PARTS):
|
||||
return True
|
||||
if _is_initial_overview(path, testakte_dir):
|
||||
return True
|
||||
if path.suffix.lower() in TEXT_EXTS:
|
||||
text = _safe_text(path)
|
||||
if any(marker in text for marker in META_MARKERS):
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
def include_in_working_dump(path: Path, testakte_dir: Path, *, include_gesamt_pdf: bool = False) -> bool:
|
||||
"""Export-Entscheidung fuer eine einzelne Datei innerhalb einer Testakte."""
|
||||
if not path.is_file():
|
||||
return False
|
||||
try:
|
||||
rel = path.relative_to(testakte_dir)
|
||||
except ValueError:
|
||||
return False
|
||||
if any(part.startswith(".") for part in rel.parts):
|
||||
return False
|
||||
if "__pycache__" in rel.parts or path.name == ".DS_Store":
|
||||
return False
|
||||
if "gesamt-pdf" in rel.parts:
|
||||
return include_gesamt_pdf and path.name == f"{testakte_dir.name}_gesamt.pdf"
|
||||
if is_export_meta_file(path, testakte_dir):
|
||||
return False
|
||||
return True
|
||||
Reference in New Issue
Block a user