data update

This commit is contained in:
2025-12-30 16:13:55 +01:00
parent 1d6b86c74e
commit f3a1108f9e
221 changed files with 77649 additions and 396 deletions

View File

@@ -1,178 +0,0 @@
#!/usr/bin/env python3
"""
Process pending JSON files into the docs/ structure using Claude.
Usage:
venv/bin/python python/process.py # Process 1 file (default)
venv/bin/python python/process.py -n 5 # Process 5 files
venv/bin/python python/process.py -n 9999 # Process all files
Run multiple instances in parallel to process faster.
"""
import argparse
import subprocess
import sys
import re
import fcntl
from pathlib import Path
PROJECT_ROOT = Path(__file__).parent.parent
PROCESSED_MD = PROJECT_ROOT / "processed.md"
LOCK_FILE = PROJECT_ROOT / ".process_lock"
PROMPT_TEMPLATE = '''Process this ONE scraped JSON file into the docs/ folder structure.
File to process: {json_file}
Steps:
1. Read the JSON file from scraped_data/
2. Extract game entities (buildings, goods, etc.)
3. Translate German to English:
- Latium = Latium (Roman region)
- Albion = Albion (Celtic region)
- Liberti = Liberti (Tier 1 Roman)
- Plebejer = Plebeians (Tier 2 Roman)
- Equites = Equites (Tier 3 Roman)
- Patrizier = Patricians (Tier 4 Roman)
- Wanderer = Waders (Tier 1 Celtic)
- Schmiede = Smiths (Tier 2 Celtic)
- Älteste = Elders (Tier 3 Celtic)
- Mercatoren = Mercators (Tier 4 Celtic)
- Edelmänner = Nobles (Tier 5 Celtic)
4. Determine the entity type and target folder:
- anno-117-buildings_* → docs/buildings/
- anno-117-goods_* → docs/goods/
- anno-117-specialists* → docs/specialists/
- Other files → skip (just mark as done)
5. Check if a markdown file already exists for this entity:
- If YES: Read the existing file, MERGE new data with existing data.
Keep existing values, add new fields, update "Unknown" values with actual data.
- If NO: Create a new markdown file using the schema format.
6. Update the category _index.md:
- If entity already listed: skip
- If not listed: add a link to the entity in the appropriate section
7. Mark the file as processed in processed.md (change `- [~] {json_file}` to `- [x] {json_file}`)
IMPORTANT for merging:
- Preserve existing data that is more specific than "Unknown"
- Add any new fields from the JSON that are missing in the existing file
- Update "Unknown" values with actual values from the JSON
- Keep cross-references and links intact
Use the existing files in docs/ as examples for formatting.
Keep entries concise. Mark unknown values as "Unknown".
'''
def claim_pending_file():
"""Atomically claim a pending file using file locking."""
LOCK_FILE.touch(exist_ok=True)
with open(LOCK_FILE, 'r+') as lock:
fcntl.flock(lock.fileno(), fcntl.LOCK_EX)
try:
content = PROCESSED_MD.read_text(encoding="utf-8")
# Find first pending file
match = re.search(r"^- \[ \] (.+\.json)$", content, re.MULTILINE)
if not match:
return None
json_file = match.group(1)
# Mark as in-progress with a special marker
content = content.replace(
f"- [ ] {json_file}",
f"- [~] {json_file}" # ~ means in-progress
)
PROCESSED_MD.write_text(content, encoding="utf-8")
return json_file
finally:
fcntl.flock(lock.fileno(), fcntl.LOCK_UN)
def mark_completed(json_file: str):
"""Mark file as completed."""
content = PROCESSED_MD.read_text(encoding="utf-8")
content = content.replace(f"- [~] {json_file}", f"- [x] {json_file}")
PROCESSED_MD.write_text(content, encoding="utf-8")
def mark_failed(json_file: str):
"""Revert file to pending if processing failed."""
content = PROCESSED_MD.read_text(encoding="utf-8")
content = content.replace(f"- [~] {json_file}", f"- [ ] {json_file}")
PROCESSED_MD.write_text(content, encoding="utf-8")
def process_file(json_file: str) -> bool:
"""Run Claude to process the file."""
prompt = PROMPT_TEMPLATE.format(json_file=json_file)
try:
result = subprocess.run(
["claude", "-p", prompt, "--allowedTools", "Read,Write,Edit,Glob"],
cwd=PROJECT_ROOT,
capture_output=True,
text=True,
timeout=300 # 5 minute timeout
)
if result.returncode != 0:
print(f"Claude error: {result.stderr}", file=sys.stderr)
return False
print(result.stdout)
return True
except subprocess.TimeoutExpired:
print(f"Timeout processing {json_file}", file=sys.stderr)
return False
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
return False
def main():
parser = argparse.ArgumentParser(description="Process pending JSON files into docs/")
parser.add_argument(
"-n", "--count",
type=int,
default=1,
help="Number of files to process (default: 1)"
)
args = parser.parse_args()
processed = 0
failed = 0
for i in range(args.count):
json_file = claim_pending_file()
if not json_file:
if processed == 0:
print("No pending files to process")
break
print(f"[{i + 1}/{args.count}] Processing: {json_file}")
if process_file(json_file):
mark_completed(json_file)
print(f"Completed: {json_file}")
processed += 1
else:
mark_failed(json_file)
print(f"Failed: {json_file}", file=sys.stderr)
failed += 1
if processed > 0 or failed > 0:
print(f"\nDone! Processed: {processed}, Failed: {failed}")
if failed > 0:
sys.exit(1)
if __name__ == "__main__":
main()