vibe-coding-cn/assets/repo/prompts-library/main.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
r"""
main.py

Unified controller for prompt-library conversions.

支持的转换模式
==============
1. Excel → Docs         : 将 Excel 工作簿转换为 Markdown 文档目录
2. Docs → Excel         : 将 Markdown 文档目录还原为 Excel 工作簿
3. Docs → JSONL         : 将 Markdown 文档转换为 JSONL 格式（保留完整元信息）
4. JSONL → Excel        : 将 JSONL 转换为 Excel（单元格存储 JSON 对象）
5. Excel(JSONL) → JSONL : 将内部 JSONL 格式的 Excel 转换为 JSONL 文件（自动忽略"说明"工作表）

数据格式规范
============
Excel 结构:
  - 每个工作表(sheet) = 一个分类(category)
  - 行(row) = 不同提示词
  - 列(col) = 版本迭代

Excel(JSONL) 结构（内部 JSONL 格式）:
  - 每个工作表(sheet) = 一个分类(category)，"说明"工作表会被忽略
  - 每个单元格存储 JSON 对象: {"title": "...", "content": "..."}

Docs 结构:
  - prompts/(N)_分类名/           # N = category_id
  - prompts/(N)_分类名/(r,c)_标题.md  # r=row, c=col

JSONL 格式 (每行一个 JSON 对象):
  {
    "category_id": 2,        # 分类编号
    "category": "元提示词",   # 分类名称
    "row": 1,                # 原 Excel 行号
    "col": 1,                # 原 Excel 列号（版本号）
    "title": "...",          # 标题（截断80字符）
    "content": "..."         # 完整内容
  }

JSONL → Excel 单元格格式:
  {"title": "...", "content": "..."}  # 只保留 title 和 content

目录约定
========
- Excel 源文件: ./prompt_excel/
- Docs 源目录:  ./prompt_docs/
- JSONL 文件:   ./prompt_jsonl/
- 输出:
  - Excel→Docs: ./prompt_docs/prompt_docs_YYYY_MMDD_HHMMSS/
  - Docs→Excel: ./prompt_excel/prompt_excel_YYYY_MMDD_HHMMSS/rebuilt.xlsx
  - Docs→JSONL: ./prompt_jsonl/{docs_name}.jsonl
  - JSONL→Excel: ./prompt_excel/{jsonl_name}.xlsx
  - Excel(JSONL)→JSONL: ./prompt_jsonl/{excel_name}.jsonl

使用示例
========
  # 交互式选择
  python3 main.py

  # Excel → Docs
  python3 main.py --select "prompt_excel/prompt.xlsx"

  # Docs → Excel
  python3 main.py --select "prompt_docs/prompt_docs_2025_1222"

  # Docs → JSONL
  python3 main.py --select "prompt_docs/prompt_docs_2025_1222" --mode docs2jsonl

  # JSONL → Excel
  python3 main.py --select "prompt_jsonl/prompt_docs.jsonl"

  # Excel(JSONL) → JSONL（自动检测或显式指定）
  python3 main.py --select "prompt_excel/prompt_jsonl.xlsx"
  python3 main.py --select "prompt_excel/prompt_jsonl.xlsx" --mode jsonl_excel2jsonl
"""
from __future__ import annotations

import argparse
import os
import subprocess
import sys
from dataclasses import dataclass
from pathlib import Path
from typing import List, Optional, Sequence, Tuple

# Optional Rich UI imports (fallback to plain if unavailable)
try:
    from rich.console import Console
    from rich.layout import Layout
    from rich.panel import Panel
    from rich.table import Table
    from rich.text import Text
    from rich import box
    from rich.prompt import IntPrompt
    _RICH_AVAILABLE = True
except Exception:  # pragma: no cover
    _RICH_AVAILABLE = False

# Optional InquirerPy for arrow-key selection
try:
    from InquirerPy import inquirer as _inq
    _INQUIRER_AVAILABLE = True
except Exception:  # pragma: no cover
    _INQUIRER_AVAILABLE = False


@dataclass
class Candidate:
    index: int
    kind: str  # "excel" | "docs" | "docs2jsonl" | "jsonl"
    path: Path
    label: str


def get_repo_root() -> Path:
    return Path(__file__).resolve().parent


def list_excel_files(excel_dir: Path) -> List[Path]:
    if not excel_dir.exists():
        return []
    return sorted([p for p in excel_dir.iterdir() if p.is_file() and p.suffix.lower() == ".xlsx"], key=lambda p: p.stat().st_mtime)


def has_prompt_files(directory: Path) -> bool:
    if not directory.exists():
        return False
    # Detect files like "(r,c)_*.md" anywhere under the directory
    for file_path in directory.rglob("*.md"):
        name = file_path.name
        if name.startswith("(") and ")_" in name:
            return True
    return False


def list_doc_sets(docs_dir: Path) -> List[Path]:
    results: List[Path] = []
    if not docs_dir.exists():
        return results
    # If the docs_dir itself looks like a set, include it
    if has_prompt_files(docs_dir):
        results.append(docs_dir)
    # Also include any immediate children that look like a docs set
    for child in sorted(docs_dir.iterdir()):
        if child.is_dir() and has_prompt_files(child):
            results.append(child)
    return results


def run_start_convert(start_convert: Path, mode: str, project_root: Path, select_path: Optional[Path] = None, excel_dir: Optional[Path] = None, docs_dir: Optional[Path] = None) -> int:
    """Delegate to scripts/start_convert.py with appropriate flags."""
    python_exe = sys.executable
    cmd: List[str] = [python_exe, str(start_convert), "--mode", mode]
    if select_path is not None:
        # Always pass as repo-root-relative or absolute string
        cmd.extend(["--select", str(select_path)])
    if excel_dir is not None:
        cmd.extend(["--excel-dir", str(excel_dir)])
    if docs_dir is not None:
        cmd.extend(["--docs-dir", str(docs_dir)])

    # Execute in repo root to ensure relative defaults resolve correctly
    proc = subprocess.run(cmd, cwd=str(project_root))
    return proc.returncode


def run_docs_to_jsonl(docs_path: Path, project_root: Path) -> int:
    """Convert docs folder to JSONL format."""
    import json
    import re

    prompts_dir = docs_path / "prompts"
    if not prompts_dir.exists():
        print(f"❌ 找不到 prompts 目录: {prompts_dir}")
        return 1

    output_dir = project_root / "prompt_jsonl"
    output_dir.mkdir(parents=True, exist_ok=True)
    output_file = output_dir / f"{docs_path.name}.jsonl"

    records = []
    for category_dir in sorted(prompts_dir.iterdir()):
        if not category_dir.is_dir():
            continue

        m = re.match(r'\((\d+)\)_(.+)', category_dir.name)
        cat_id, cat_name = (m.groups() if m else (0, category_dir.name))

        for md_file in sorted(category_dir.glob("*.md")):
            if md_file.name == "index.md":
                continue

            fm = re.match(r'\((\d+),(\d+)\)_(.+)\.md', md_file.name)
            if not fm:
                continue

            row, col, title = fm.groups()
            content = md_file.read_text(encoding='utf-8')

            records.append({
                "category_id": int(cat_id),
                "category": cat_name,
                "row": int(row),
                "col": int(col),
                "title": title[:80],
                "content": content
            })

    with open(output_file, 'w', encoding='utf-8') as f:
        for r in records:
            f.write(json.dumps(r, ensure_ascii=False) + '\n')

    print(f"✅ Docs→JSONL OK: {docs_path.name} → {output_file.relative_to(project_root)}")
    return 0


def list_jsonl_files(jsonl_dir: Path) -> List[Path]:
    if not jsonl_dir.exists():
        return []
    return sorted([p for p in jsonl_dir.iterdir() if p.is_file() and p.suffix.lower() == ".jsonl"], key=lambda p: p.stat().st_mtime)


def is_jsonl_excel(excel_path: Path) -> bool:
    """检测 Excel 是否为内部 JSONL 格式（单元格存储 JSON 对象）"""
    import json
    try:
        import pandas as pd
    except ImportError:
        return False

    try:
        xlsx = pd.ExcelFile(excel_path)
        for sheet in xlsx.sheet_names[:2]:  # 检查前两个工作表
            if sheet == '说明':
                continue
            df = pd.read_excel(xlsx, sheet_name=sheet, header=None, nrows=1)
            if df.empty:
                continue
            first_val = str(df.iloc[0, 0]).strip() if not pd.isna(df.iloc[0, 0]) else ""
            # 检查列名或第一个单元格是否为 JSON
            first_col = str(df.columns[0]).strip() if len(df.columns) > 0 else ""
            for val in [first_col, first_val]:
                if val.startswith('{') and val.endswith('}'):
                    try:
                        obj = json.loads(val)
                        if 'title' in obj and 'content' in obj:
                            return True
                    except:
                        pass
        return False
    except:
        return False


def run_jsonl_excel_to_jsonl(excel_path: Path, project_root: Path) -> int:
    """将内部 JSONL 格式的 Excel 转换为 JSONL 文件（忽略"说明"工作表）"""
    import json
    try:
        import pandas as pd
    except ImportError:
        print("❌ 需要 pandas: pip install pandas openpyxl")
        return 1

    xlsx = pd.ExcelFile(excel_path)
    output_lines = []
    cat_id = 0

    for sheet in xlsx.sheet_names:
        if sheet == '说明':
            continue

        cat_id += 1
        cat_name = sheet
        df = pd.read_excel(xlsx, sheet_name=sheet, header=None)

        # 检查列名是否是 JSON 数据
        for col_idx, col_name in enumerate(df.columns):
            col_str = str(col_name).strip()
            if col_str.startswith('{') and col_str.endswith('}'):
                try:
                    obj = json.loads(col_str)
                    if 'title' in obj and 'content' in obj:
                        output_lines.append(json.dumps({
                            "category_id": cat_id,
                            "category": cat_name,
                            "row": 1,
                            "col": col_idx + 1,
                            "title": obj["title"][:80],
                            "content": obj["content"]
                        }, ensure_ascii=False))
                except:
                    pass

        # 处理数据行
        for row_idx, row in df.iterrows():
            for col_idx, val in enumerate(row):
                if pd.isna(val):
                    continue
                val_str = str(val).strip()
                if val_str.startswith('{') and val_str.endswith('}'):
                    try:
                        obj = json.loads(val_str)
                        if 'title' in obj and 'content' in obj:
                            output_lines.append(json.dumps({
                                "category_id": cat_id,
                                "category": cat_name,
                                "row": row_idx + 2,
                                "col": col_idx + 1,
                                "title": obj["title"][:80],
                                "content": obj["content"]
                            }, ensure_ascii=False))
                    except:
                        pass

    if not output_lines:
        print(f"❌ 未找到有效的 JSONL 数据: {excel_path}")
        return 1

    from datetime import datetime
    timestamp = datetime.now().strftime("%Y_%m%d_%H%M%S")

    output_dir = project_root / "prompt_jsonl"
    output_dir.mkdir(parents=True, exist_ok=True)
    output_file = output_dir / f"{excel_path.stem}_{timestamp}.jsonl"

    with open(output_file, 'w', encoding='utf-8') as f:
        f.write('\n'.join(output_lines))

    print(f"✅ Excel(JSONL)→JSONL OK: {excel_path.name} → {output_file.relative_to(project_root)} ({len(output_lines)} 条记录)")
    return 0


def run_jsonl_to_excel(jsonl_path: Path, project_root: Path) -> int:
    """Convert JSONL to Excel, each cell contains the full JSON object as string."""
    import json
    from collections import defaultdict
    try:
        import pandas as pd
    except ImportError:
        print("❌ 需要 pandas: pip install pandas openpyxl")
        return 1

    records = []
    with open(jsonl_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():
                records.append(json.loads(line))

    if not records:
        print(f"❌ JSONL 文件为空: {jsonl_path}")
        return 1

    # category -> {row -> {col -> json_string}}
    sheets_data: dict = defaultdict(lambda: defaultdict(dict))
    cat_id_map = {}

    for r in records:
        cat_name = r["category"]
        cat_id_map[r["category_id"]] = cat_name
        # 单元格内容只保留 title 和 content
        cell_data = {"title": r["title"], "content": r["content"]}
        sheets_data[cat_name][r["row"]][r["col"]] = json.dumps(cell_data, ensure_ascii=False)

    output_dir = project_root / "prompt_excel"
    output_dir.mkdir(parents=True, exist_ok=True)
    output_file = output_dir / f"{jsonl_path.stem}.xlsx"

    sorted_cats = sorted(cat_id_map.items(), key=lambda x: x[0])

    with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
        for cat_id, cat_name in sorted_cats:
            row_data = sheets_data[cat_name]
            if not row_data:
                continue

            max_row = max(row_data.keys())
            max_col = max(c for cols in row_data.values() for c in cols.keys())

            data = []
            for row_idx in range(1, max_row + 1):
                row_list = []
                for col_idx in range(1, max_col + 1):
                    row_list.append(row_data.get(row_idx, {}).get(col_idx, ""))
                data.append(row_list)

            df = pd.DataFrame(data)
            sheet_name = cat_name[:31]
            df.to_excel(writer, sheet_name=sheet_name, index=False, header=False)

    print(f"✅ JSONL→Excel OK: {jsonl_path.name} → {output_file.relative_to(project_root)} ({len(sorted_cats)} 个工作表)")
    return 0


def build_candidates(project_root: Path, excel_dir: Path, docs_dir: Path) -> List[Candidate]:
    candidates: List[Candidate] = []
    idx = 1
    jsonl_dir = project_root / "prompt_jsonl"

    for path in list_excel_files(excel_dir):
        label = f"{path.name}"
        # 检测是否为内部 JSONL 格式的 Excel
        if is_jsonl_excel(path):
            candidates.append(Candidate(index=idx, kind="jsonl_excel", path=path, label=label))
        else:
            candidates.append(Candidate(index=idx, kind="excel", path=path, label=label))
        idx += 1
    for path in list_doc_sets(docs_dir):
        display = path.relative_to(project_root) if path.is_absolute() else path
        # Docs → Excel
        candidates.append(Candidate(index=idx, kind="docs", path=path, label=f"{display}"))
        idx += 1
        # Docs → JSONL
        candidates.append(Candidate(index=idx, kind="docs2jsonl", path=path, label=f"{display}"))
        idx += 1
    for path in list_jsonl_files(jsonl_dir):
        label = f"{path.name}"
        candidates.append(Candidate(index=idx, kind="jsonl", path=path, label=label))
        idx += 1
    return candidates


def select_interactively(candidates: Sequence[Candidate]) -> Optional[Candidate]:
    if not candidates:
        print("没有可用的 Excel 或 Docs 源。请将 .xlsx 放到 prompt_excel/ 或将文档放到 prompt_docs/ 下。")
        return None

    # Prefer arrow-key selection if available
    if _INQUIRER_AVAILABLE:
        try:
            choices = [
                {"name": f"[{c.kind.upper()}] {c.label}", "value": c.index}
                for c in candidates
            ]
            selection = _inq.select(
                message="选择要转换的源（上下箭头，回车确认，Ctrl+C 取消）:",
                choices=choices,
                default=choices[0]["value"],
            ).execute()
            match = next((c for c in candidates if c.index == selection), None)
            return match
        except KeyboardInterrupt:
            return None

    if _RICH_AVAILABLE:
        console = Console()
        layout = Layout()
        layout.split_column(
            Layout(name="header", size=3),
            Layout(name="list"),
            Layout(name="footer", size=3),
        )
        header = Panel(Text("提示词库转换器", style="bold cyan"), subtitle="选择一个源开始转换", box=box.ROUNDED)

        table = Table(box=box.SIMPLE_HEAVY)
        table.add_column("编号", style="bold yellow", justify="right", width=4)
        table.add_column("类型", style="magenta", width=16)
        table.add_column("路径/名称", style="white")
        kind_labels = {"excel": "Excel→Docs", "docs": "Docs→Excel", "docs2jsonl": "Docs→JSONL", "jsonl": "JSONL→Excel", "jsonl_excel": "Excel(JSONL)→JSONL"}
        for c in candidates:
            table.add_row(str(c.index), kind_labels.get(c.kind, c.kind), c.label)

        layout["header"].update(header)
        layout["list"].update(Panel(table, title="可选源", border_style="cyan"))
        layout["footer"].update(Panel(Text("输入编号并回车（0 退出）", style="bold"), box=box.ROUNDED))
        console.print(layout)

        while True:
            try:
                choice = IntPrompt.ask("编号", default=0)
            except Exception:
                return None
            if choice == 0:
                return None
            match = next((c for c in candidates if c.index == choice), None)
            if match is not None:
                return match
            console.print("[red]编号不存在，请重试[/red]")

    # Plain fallback
    kind_labels = {"excel": "Excel→Docs", "docs": "Docs→Excel", "docs2jsonl": "Docs→JSONL", "jsonl": "JSONL→Excel", "jsonl_excel": "Excel(JSONL)→JSONL"}
    print("请选择一个源进行转换：")
    for c in candidates:
        print(f"  {c.index:2d}. [{kind_labels.get(c.kind, c.kind)}] {c.label}")
    print("  0. 退出")
    while True:
        try:
            raw = input("输入编号后回车：").strip()
        except EOFError:
            return None
        if not raw:
            continue
        if raw == "0":
            return None
        if not raw.isdigit():
            print("请输入有效数字。")
            continue
        choice = int(raw)
        match = next((c for c in candidates if c.index == choice), None)
        if match is None:
            print("编号不存在，请重试。")
            continue
        return match


def parse_args() -> argparse.Namespace:
    p = argparse.ArgumentParser(description="prompt-library conversion controller")
    p.add_argument("--excel-dir", type=str, default="prompt_excel", help="Excel sources directory (default: prompt_excel)")
    p.add_argument("--docs-dir", type=str, default="prompt_docs", help="Docs sources directory (default: prompt_docs)")
    p.add_argument("--select", type=str, default=None, help="Path to a specific .xlsx file or a docs folder")
    p.add_argument("--mode", type=str, choices=["excel2docs", "docs2excel", "docs2jsonl", "jsonl2excel", "jsonl_excel2jsonl"], default=None, help="Conversion mode (auto-detect if not specified)")
    p.add_argument("--non-interactive", action="store_true", help="Do not prompt; require --select or exit")
    return p.parse_args()


def main() -> int:
    repo_root = get_repo_root()
    start_convert = repo_root / "scripts" / "start_convert.py"
    if not start_convert.exists():
        print("找不到 scripts/start_convert.py。")
        return 1

    args = parse_args()

    excel_dir = (repo_root / args.excel_dir).resolve() if not Path(args.excel_dir).is_absolute() else Path(args.excel_dir).resolve()
    docs_dir = (repo_root / args.docs_dir).resolve() if not Path(args.docs_dir).is_absolute() else Path(args.docs_dir).resolve()

    # Non-interactive path with explicit selection
    if args.non_interactive or args.select:
        if not args.select:
            print("--non-interactive 需要配合 --select 使用。")
            return 2
        selected = Path(args.select)
        if not selected.is_absolute():
            selected = (repo_root / selected).resolve()
        if not selected.exists():
            print(f"选择的路径不存在: {selected}")
            return 2
        if selected.is_file() and selected.suffix.lower() == ".xlsx":
            # 检测是否为内部 JSONL 格式或显式指定模式
            if args.mode == "jsonl_excel2jsonl" or is_jsonl_excel(selected):
                return run_jsonl_excel_to_jsonl(selected, repo_root)
            return run_start_convert(start_convert, mode="excel2docs", project_root=repo_root, select_path=selected, excel_dir=excel_dir)
        if selected.is_file() and selected.suffix.lower() == ".jsonl":
            return run_jsonl_to_excel(selected, repo_root)
        if selected.is_dir():
            # Check mode or default to docs2excel
            if args.mode == "docs2jsonl":
                return run_docs_to_jsonl(selected, repo_root)
            return run_start_convert(start_convert, mode="docs2excel", project_root=repo_root, select_path=selected, docs_dir=docs_dir)
        print("无法识别的选择类型。")
        return 2

    # Interactive selection
    candidates = build_candidates(repo_root, excel_dir, docs_dir)
    chosen = select_interactively(candidates)
    if chosen is None:
        return 0
    if chosen.kind == "excel":
        return run_start_convert(start_convert, mode="excel2docs", project_root=repo_root, select_path=chosen.path, excel_dir=excel_dir)
    elif chosen.kind == "jsonl_excel":
        return run_jsonl_excel_to_jsonl(chosen.path, repo_root)
    elif chosen.kind == "docs2jsonl":
        return run_docs_to_jsonl(chosen.path, repo_root)
    elif chosen.kind == "jsonl":
        return run_jsonl_to_excel(chosen.path, repo_root)
    else:
        return run_start_convert(start_convert, mode="docs2excel", project_root=repo_root, select_path=chosen.path, docs_dir=docs_dir)


if __name__ == "__main__":
    sys.exit(main())