vibe-coding-cn/libs/external/prompts-library/scripts/reindex_rows.py

50 lines
1.6 KiB
Python

import json
import shutil
from collections import defaultdict
input_file = "prompt_jsonl/prompt_docs_refactored.jsonl"
output_file = "prompt_jsonl/prompt_docs_refactored_reindexed.jsonl"
backup_file = "prompt_jsonl/prompt_docs_refactored_before_reindex.jsonl.bak"
def reindex_rows():
# 1. Backup
shutil.copy(input_file, backup_file)
print(f"Backup created: {backup_file}")
# 2. Load and Group
items_by_cat = defaultdict(list)
with open(input_file, 'r', encoding='utf-8') as f:
for line in f:
if not line.strip(): continue
item = json.loads(line)
cat = item.get('category', 'Uncategorized')
items_by_cat[cat].append(item)
# 3. Sort and Reindex
total_items = 0
with open(output_file, 'w', encoding='utf-8') as f:
# Sort categories for consistent file order
for cat in sorted(items_by_cat.keys()):
items = items_by_cat[cat]
# Sort items by their OLD row to preserve relative order
items.sort(key=lambda x: x.get('row', 0))
# Reassign row numbers starting from 1
for i, item in enumerate(items):
item['row'] = i + 1
f.write(json.dumps(item, ensure_ascii=False) + '\n')
total_items += 1
print(f"Category '{cat}': re-indexed {len(items)} items.")
print(f"Re-indexed file written: {output_file}")
print(f"Total items: {total_items}")
# Overwrite original
shutil.move(output_file, input_file)
print(f"Overwritten original file: {input_file}")
if __name__ == "__main__":
reindex_rows()