179 lines
5.5 KiB
Python
Executable File
179 lines
5.5 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
"""
|
|
Process pending JSON files into the docs/ structure using Claude.
|
|
|
|
Usage:
|
|
venv/bin/python python/process.py # Process 1 file (default)
|
|
venv/bin/python python/process.py -n 5 # Process 5 files
|
|
venv/bin/python python/process.py -n 9999 # Process all files
|
|
|
|
Run multiple instances in parallel to process faster.
|
|
"""
|
|
|
|
import argparse
|
|
import subprocess
|
|
import sys
|
|
import re
|
|
import fcntl
|
|
from pathlib import Path
|
|
|
|
PROJECT_ROOT = Path(__file__).parent.parent
|
|
PROCESSED_MD = PROJECT_ROOT / "processed.md"
|
|
LOCK_FILE = PROJECT_ROOT / ".process_lock"
|
|
|
|
PROMPT_TEMPLATE = '''Process this ONE scraped JSON file into the docs/ folder structure.
|
|
|
|
File to process: {json_file}
|
|
|
|
Steps:
|
|
1. Read the JSON file from scraped_data/
|
|
2. Extract game entities (buildings, goods, etc.)
|
|
3. Translate German to English:
|
|
- Latium = Latium (Roman region)
|
|
- Albion = Albion (Celtic region)
|
|
- Liberti = Liberti (Tier 1 Roman)
|
|
- Plebejer = Plebeians (Tier 2 Roman)
|
|
- Equites = Equites (Tier 3 Roman)
|
|
- Patrizier = Patricians (Tier 4 Roman)
|
|
- Wanderer = Waders (Tier 1 Celtic)
|
|
- Schmiede = Smiths (Tier 2 Celtic)
|
|
- Älteste = Elders (Tier 3 Celtic)
|
|
- Mercatoren = Mercators (Tier 4 Celtic)
|
|
- Edelmänner = Nobles (Tier 5 Celtic)
|
|
4. Determine the entity type and target folder:
|
|
- anno-117-buildings_* → docs/buildings/
|
|
- anno-117-goods_* → docs/goods/
|
|
- anno-117-specialists* → docs/specialists/
|
|
- Other files → skip (just mark as done)
|
|
5. Check if a markdown file already exists for this entity:
|
|
- If YES: Read the existing file, MERGE new data with existing data.
|
|
Keep existing values, add new fields, update "Unknown" values with actual data.
|
|
- If NO: Create a new markdown file using the schema format.
|
|
6. Update the category _index.md:
|
|
- If entity already listed: skip
|
|
- If not listed: add a link to the entity in the appropriate section
|
|
7. Mark the file as processed in processed.md (change `- [~] {json_file}` to `- [x] {json_file}`)
|
|
|
|
IMPORTANT for merging:
|
|
- Preserve existing data that is more specific than "Unknown"
|
|
- Add any new fields from the JSON that are missing in the existing file
|
|
- Update "Unknown" values with actual values from the JSON
|
|
- Keep cross-references and links intact
|
|
|
|
Use the existing files in docs/ as examples for formatting.
|
|
Keep entries concise. Mark unknown values as "Unknown".
|
|
'''
|
|
|
|
|
|
def claim_pending_file():
|
|
"""Atomically claim a pending file using file locking."""
|
|
LOCK_FILE.touch(exist_ok=True)
|
|
|
|
with open(LOCK_FILE, 'r+') as lock:
|
|
fcntl.flock(lock.fileno(), fcntl.LOCK_EX)
|
|
try:
|
|
content = PROCESSED_MD.read_text(encoding="utf-8")
|
|
|
|
# Find first pending file
|
|
match = re.search(r"^- \[ \] (.+\.json)$", content, re.MULTILINE)
|
|
if not match:
|
|
return None
|
|
|
|
json_file = match.group(1)
|
|
|
|
# Mark as in-progress with a special marker
|
|
content = content.replace(
|
|
f"- [ ] {json_file}",
|
|
f"- [~] {json_file}" # ~ means in-progress
|
|
)
|
|
PROCESSED_MD.write_text(content, encoding="utf-8")
|
|
|
|
return json_file
|
|
finally:
|
|
fcntl.flock(lock.fileno(), fcntl.LOCK_UN)
|
|
|
|
|
|
def mark_completed(json_file: str):
|
|
"""Mark file as completed."""
|
|
content = PROCESSED_MD.read_text(encoding="utf-8")
|
|
content = content.replace(f"- [~] {json_file}", f"- [x] {json_file}")
|
|
PROCESSED_MD.write_text(content, encoding="utf-8")
|
|
|
|
|
|
def mark_failed(json_file: str):
|
|
"""Revert file to pending if processing failed."""
|
|
content = PROCESSED_MD.read_text(encoding="utf-8")
|
|
content = content.replace(f"- [~] {json_file}", f"- [ ] {json_file}")
|
|
PROCESSED_MD.write_text(content, encoding="utf-8")
|
|
|
|
|
|
def process_file(json_file: str) -> bool:
|
|
"""Run Claude to process the file."""
|
|
prompt = PROMPT_TEMPLATE.format(json_file=json_file)
|
|
|
|
try:
|
|
result = subprocess.run(
|
|
["claude", "-p", prompt, "--allowedTools", "Read,Write,Edit,Glob"],
|
|
cwd=PROJECT_ROOT,
|
|
capture_output=True,
|
|
text=True,
|
|
timeout=300 # 5 minute timeout
|
|
)
|
|
|
|
if result.returncode != 0:
|
|
print(f"Claude error: {result.stderr}", file=sys.stderr)
|
|
return False
|
|
|
|
print(result.stdout)
|
|
return True
|
|
|
|
except subprocess.TimeoutExpired:
|
|
print(f"Timeout processing {json_file}", file=sys.stderr)
|
|
return False
|
|
except Exception as e:
|
|
print(f"Error: {e}", file=sys.stderr)
|
|
return False
|
|
|
|
|
|
def main():
|
|
parser = argparse.ArgumentParser(description="Process pending JSON files into docs/")
|
|
parser.add_argument(
|
|
"-n", "--count",
|
|
type=int,
|
|
default=1,
|
|
help="Number of files to process (default: 1)"
|
|
)
|
|
args = parser.parse_args()
|
|
|
|
processed = 0
|
|
failed = 0
|
|
|
|
for i in range(args.count):
|
|
json_file = claim_pending_file()
|
|
|
|
if not json_file:
|
|
if processed == 0:
|
|
print("No pending files to process")
|
|
break
|
|
|
|
print(f"[{i + 1}/{args.count}] Processing: {json_file}")
|
|
|
|
if process_file(json_file):
|
|
mark_completed(json_file)
|
|
print(f"Completed: {json_file}")
|
|
processed += 1
|
|
else:
|
|
mark_failed(json_file)
|
|
print(f"Failed: {json_file}", file=sys.stderr)
|
|
failed += 1
|
|
|
|
if processed > 0 or failed > 0:
|
|
print(f"\nDone! Processed: {processed}, Failed: {failed}")
|
|
|
|
if failed > 0:
|
|
sys.exit(1)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|