vibe-coding-cn/assets/repo/prompts-library/scripts/refactor_jsonl.py

155 lines
5.3 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import json
import os
mapping = {
# 编程技术
"软件工程glue_coding_用提示词": "编程技术",
"前端复刻流程": "编程技术",
"输入转单行JSON": "编程技术",
"序列图生成": "编程技术",
"流程图": "编程技术",
"函数化万物": "编程技术",
"编程知识库": "编程技术",
"网页UI逆向分析提示词": "编程技术",
"用户优化前端设计": "编程技术",
"图像特征提取": "编程技术",
"前端通用设计": "编程技术",
# 逻辑工具箱
"哲学工具箱": "逻辑工具箱",
"逻辑工具箱": "逻辑工具箱",
"批判性思维分析": "逻辑工具箱",
"思维模型": "逻辑工具箱",
"政治批判工具箱": "逻辑工具箱",
"未来视角": "逻辑工具箱",
"层级结构分析": "逻辑工具箱",
"问题分类识别": "逻辑工具箱",
"分析": "逻辑工具箱",
"终极本质分析": "逻辑工具箱",
"事实核查": "逻辑工具箱",
"关键词图谱": "逻辑工具箱",
"语言分析元prompt": "逻辑工具箱",
"逻辑分析": "逻辑工具箱",
"黄金圈解释": "逻辑工具箱",
"谋士": "逻辑工具箱",
"经验": "逻辑工具箱",
"": "逻辑工具箱",
"": "逻辑工具箱",
"": "逻辑工具箱",
"": "逻辑工具箱",
"心经口诀创作提示词": "逻辑工具箱",
"临界知识": "逻辑工具箱",
"项目分析": "逻辑工具箱",
"对话提问": "逻辑工具箱",
"思维导图": "逻辑工具箱",
# 内容创作
"文案逆向": "内容创作",
"x_prompt收集": "内容创作",
"x提示词收集": "内容创作",
"x爆款文案生成器": "内容创作",
"推文制作提示词": "内容创作",
"李继刚文选": "内容创作",
"解释提示词": "内容创作",
"一句话描述任何内容": "内容创作",
"子弹总结": "内容创作",
"文本转md语法电子书处理": "内容创作",
"排版和图片,视频转文本": "内容创作",
"艺术风格描述": "内容创作",
"视频生成提示词": "内容创作",
"图片逆向": "内容创作",
"排版": "内容创作",
"内容提炼": "内容创作",
"简讯提示词": "内容创作",
"艺术": "内容创作",
"人话写作": "内容创作",
"小红书": "内容创作",
"组织语言": "内容创作",
"正向人物生平报告官方文案": "内容创作",
"gemini字幕处理": "内容创作",
# 学习教育
"学习提示词": "学习教育",
"学习用提示词": "学习教育",
"ai学习用提示词": "学习教育",
"书籍结构化分析": "学习教育",
"典籍句子学习": "学习教育",
"anki卡片格式输出": "学习教育",
"notebookllm用提示词": "学习教育",
"英文学习": "学习教育",
"速成学习": "学习教育",
"论文解读": "学习教育",
"真传一句话": "学习教育",
"学习音频": "学习教育",
"豆包听书": "学习教育",
"最小知识框架": "学习教育",
# 商业分析
"grok商业金融分析提示词": "商业分析",
"投资调研": "商业分析",
"行业分析": "商业分析",
"需求对齐": "商业分析",
"需求结构化描述": "商业分析",
"麦肯锡行业分析": "商业分析",
"产品策略": "商业分析",
"行业咨询": "商业分析",
"需求解析": "商业分析",
"SOP制作": "商业分析",
# 提示词工程
"元提示词": "提示词工程",
"提示词模块": "提示词工程",
"根据内容逆向提示词": "提示词工程",
"系统提示词": "提示词工程",
"AI使用思维": "提示词工程",
"使用ai的思维": "提示词工程",
"最小字数系统提示词": "提示词工程",
"ChatGPT": "提示词工程",
"Reddit提示词": "提示词工程",
"好prompt生成器": "提示词工程",
"思维协议": "提示词工程",
"grok抓取提示词": "提示词工程",
# 其他
"AI_交易系统提示词": "综合杂项",
"面向CZ": "综合杂项",
}
id_map = {
"编程技术": 1,
"逻辑工具箱": 2,
"内容创作": 3,
"学习教育": 4,
"商业分析": 5,
"提示词工程": 6,
"综合杂项": 7
}
input_file = "prompt_jsonl/prompt_docs_2025_1222_004537.jsonl"
output_file = "prompt_jsonl/prompt_docs_refactored.jsonl"
def process():
stats = {}
with open(input_file, 'r', encoding='utf-8') as fin, \
open(output_file, 'w', encoding='utf-8') as fout:
for line in fin:
if not line.strip(): continue
data = json.loads(line)
old_cat = data.get('category', '')
new_cat = mapping.get(old_cat, "综合杂项")
# Keep original category in tags if it doesn't exist?
# Or just replace. The user said "只调整 'category'"
data['category'] = new_cat
data['category_id'] = id_map.get(new_cat, 7)
fout.write(json.dumps(data, ensure_ascii=False) + '\n')
stats[new_cat] = stats.get(new_cat, 0) + 1
print("Refactor complete.")
for cat, count in stats.items():
print(f"{cat}: {count}")
if __name__ == "__main__":
process()