50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import json
|
|
import shutil
|
|
from collections import defaultdict
|
|
|
|
input_file = "prompt_jsonl/prompt_docs_refactored.jsonl"
|
|
output_file = "prompt_jsonl/prompt_docs_refactored_reindexed.jsonl"
|
|
backup_file = "prompt_jsonl/prompt_docs_refactored_before_reindex.jsonl.bak"
|
|
|
|
def reindex_rows():
|
|
# 1. Backup
|
|
shutil.copy(input_file, backup_file)
|
|
print(f"Backup created: {backup_file}")
|
|
|
|
# 2. Load and Group
|
|
items_by_cat = defaultdict(list)
|
|
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
for line in f:
|
|
if not line.strip(): continue
|
|
item = json.loads(line)
|
|
cat = item.get('category', 'Uncategorized')
|
|
items_by_cat[cat].append(item)
|
|
|
|
# 3. Sort and Reindex
|
|
total_items = 0
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
# Sort categories for consistent file order
|
|
for cat in sorted(items_by_cat.keys()):
|
|
items = items_by_cat[cat]
|
|
# Sort items by their OLD row to preserve relative order
|
|
items.sort(key=lambda x: x.get('row', 0))
|
|
|
|
# Reassign row numbers starting from 1
|
|
for i, item in enumerate(items):
|
|
item['row'] = i + 1
|
|
f.write(json.dumps(item, ensure_ascii=False) + '\n')
|
|
total_items += 1
|
|
|
|
print(f"Category '{cat}': re-indexed {len(items)} items.")
|
|
|
|
print(f"Re-indexed file written: {output_file}")
|
|
print(f"Total items: {total_items}")
|
|
|
|
# Overwrite original
|
|
shutil.move(output_file, input_file)
|
|
print(f"Overwritten original file: {input_file}")
|
|
|
|
if __name__ == "__main__":
|
|
reindex_rows()
|