#!/usr/bin/env python3 """ Process one pending JSON file into the docs/ structure using Claude. Usage: venv/bin/python python/process.py Run multiple times in parallel to process faster. """ import subprocess import sys import re import fcntl from pathlib import Path PROJECT_ROOT = Path(__file__).parent.parent PROCESSED_MD = PROJECT_ROOT / "processed.md" LOCK_FILE = PROJECT_ROOT / ".process_lock" PROMPT_TEMPLATE = '''Process this ONE scraped JSON file into the docs/ folder structure. File to process: {json_file} Steps: 1. Read the JSON file from scraped_data/ 2. Extract game entities (buildings, goods, etc.) 3. Translate German to English: - Latium = Latium (Roman region) - Albion = Albion (Celtic region) - Liberti = Liberti (Tier 1 Roman) - Plebejer = Plebeians (Tier 2 Roman) - Equites = Equites (Tier 3 Roman) - Patrizier = Patricians (Tier 4 Roman) - Wanderer = Waders (Tier 1 Celtic) - Schmiede = Smiths (Tier 2 Celtic) - Älteste = Elders (Tier 3 Celtic) - Mercatoren = Mercators (Tier 4 Celtic) - Edelmänner = Nobles (Tier 5 Celtic) 4. Determine the entity type and target folder: - anno-117-buildings_* → docs/buildings/ - anno-117-goods_* → docs/goods/ - anno-117-specialists* → docs/specialists/ - Other files → skip (just mark as done) 5. Check if a markdown file already exists for this entity: - If YES: Read the existing file, MERGE new data with existing data. Keep existing values, add new fields, update "Unknown" values with actual data. - If NO: Create a new markdown file using the schema format. 6. Update the category _index.md: - If entity already listed: skip - If not listed: add a link to the entity in the appropriate section 7. Mark the file as processed in processed.md (change `- [~] {json_file}` to `- [x] {json_file}`) IMPORTANT for merging: - Preserve existing data that is more specific than "Unknown" - Add any new fields from the JSON that are missing in the existing file - Update "Unknown" values with actual values from the JSON - Keep cross-references and links intact Use the existing files in docs/ as examples for formatting. Keep entries concise. Mark unknown values as "Unknown". ''' def claim_pending_file(): """Atomically claim a pending file using file locking.""" LOCK_FILE.touch(exist_ok=True) with open(LOCK_FILE, 'r+') as lock: fcntl.flock(lock.fileno(), fcntl.LOCK_EX) try: content = PROCESSED_MD.read_text(encoding="utf-8") # Find first pending file match = re.search(r"^- \[ \] (.+\.json)$", content, re.MULTILINE) if not match: return None json_file = match.group(1) # Mark as in-progress with a special marker content = content.replace( f"- [ ] {json_file}", f"- [~] {json_file}" # ~ means in-progress ) PROCESSED_MD.write_text(content, encoding="utf-8") return json_file finally: fcntl.flock(lock.fileno(), fcntl.LOCK_UN) def mark_completed(json_file: str): """Mark file as completed.""" content = PROCESSED_MD.read_text(encoding="utf-8") content = content.replace(f"- [~] {json_file}", f"- [x] {json_file}") PROCESSED_MD.write_text(content, encoding="utf-8") def mark_failed(json_file: str): """Revert file to pending if processing failed.""" content = PROCESSED_MD.read_text(encoding="utf-8") content = content.replace(f"- [~] {json_file}", f"- [ ] {json_file}") PROCESSED_MD.write_text(content, encoding="utf-8") def process_file(json_file: str) -> bool: """Run Claude to process the file.""" prompt = PROMPT_TEMPLATE.format(json_file=json_file) try: result = subprocess.run( ["claude", "-p", prompt, "--allowedTools", "Read,Write,Edit,Glob"], cwd=PROJECT_ROOT, capture_output=True, text=True, timeout=300 # 5 minute timeout ) if result.returncode != 0: print(f"Claude error: {result.stderr}", file=sys.stderr) return False print(result.stdout) return True except subprocess.TimeoutExpired: print(f"Timeout processing {json_file}", file=sys.stderr) return False except Exception as e: print(f"Error: {e}", file=sys.stderr) return False def main(): json_file = claim_pending_file() if not json_file: print("No pending files to process") return print(f"Processing: {json_file}") if process_file(json_file): mark_completed(json_file) print(f"Completed: {json_file}") else: mark_failed(json_file) print(f"Failed: {json_file}", file=sys.stderr) sys.exit(1) if __name__ == "__main__": main()