vibe-coding-cn/backups/快速备份.py

266 lines
8.4 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
快速备份项目工具
读取 .gitignore 规则并打包项目文件(排除匹配的文件)
bash backups/一键备份.sh
文件位置:
backups/快速备份.py
工具清单backups/目录):
• 快速备份.py - 核心备份引擎7.3 KB
• 一键备份.sh - 一键执行脚本2.4 KB
使用方法:
$ bash backups/一键备份.sh
$ python3 backups/快速备份.py
备份输出:
backups/gz/备份_YYYYMMDD_HHMMSS.tar.gz
适用项目:
任何包含 .gitignore 文件的项目(自动读取规则并排除匹配文件)
依赖:
无需额外安装包仅使用Python内置模块
"""
import os
import tarfile
import fnmatch
from pathlib import Path
from datetime import datetime
import argparse
import sys
class GitignoreFilter:
"""解析 .gitignore 文件并过滤文件"""
def __init__(self, gitignore_path: Path, project_root: Path):
self.project_root = project_root
# 规则按照出现顺序存储,支持取反(!)语义,后匹配覆盖前匹配
# 每项: {"pattern": str, "dir_only": bool, "negate": bool, "has_slash": bool}
self.rules = []
self.load_gitignore(gitignore_path)
def load_gitignore(self, gitignore_path: Path):
"""加载并解析 .gitignore 文件"""
if not gitignore_path.exists():
print(f"⚠️ 警告: {gitignore_path} 不存在,将不应用任何过滤规则")
return
try:
with open(gitignore_path, 'r', encoding='utf-8') as f:
for line in f:
line = line.strip()
# 跳过空行和注释
if not line or line.startswith('#'):
continue
negate = line.startswith('!')
if negate:
line = line[1:].lstrip()
if not line:
continue
dir_only = line.endswith('/')
has_slash = '/' in line.rstrip('/')
self.rules.append({
"pattern": line,
"dir_only": dir_only,
"negate": negate,
"has_slash": has_slash,
})
print(f"✓ 已加载 {len(self.rules)} 条规则(含取反)")
except Exception as e:
print(f"❌ 读取 .gitignore 失败: {e}")
sys.exit(1)
def _match_rule(self, rule: dict, relative_path_str: str, is_dir: bool) -> bool:
"""按规则匹配路径,返回是否命中"""
pattern = rule["pattern"]
dir_only = rule["dir_only"]
has_slash = rule["has_slash"]
# 目录规则:匹配目录自身或其子路径
if dir_only:
normalized = pattern.rstrip('/')
if relative_path_str == normalized or relative_path_str.startswith(normalized + '/'):
return True
return False
# 带路径分隔的规则:按相对路径匹配
if has_slash:
return fnmatch.fnmatch(relative_path_str, pattern)
# 无斜杠:匹配任意层级的基本名
if fnmatch.fnmatch(Path(relative_path_str).name, pattern):
return True
# 额外处理目录命中:无通配符时,若任一父级目录名等于 pattern 也视为命中
if pattern.isalpha() and pattern in relative_path_str.split('/'):
return True
return False
def should_exclude(self, path: Path, is_dir: bool = False) -> bool:
"""
判断路径是否应该被排除(支持 ! 取反,后匹配覆盖前匹配)
返回 True 表示应该排除(不备份)
"""
try:
# 统一使用 POSIX 路径风格进行匹配
relative_path_str = path.relative_to(self.project_root).as_posix()
except ValueError:
return False # 不在项目根目录内,不处理
# Git 风格:从上到下最后一次匹配决定去留
matched = None
for rule in self.rules:
if self._match_rule(rule, relative_path_str, is_dir):
matched = not rule["negate"] # negate 表示显式允许
return bool(matched)
def create_backup(project_root: Path, output_file: Path, filter_obj: GitignoreFilter):
"""创建备份压缩包"""
# 统计信息
total_files = 0
excluded_files = 0
included_files = 0
print(f"\n{'='*60}")
print(f"开始备份项目: {project_root}")
print(f"输出文件: {output_file}")
print(f"{'='*60}\n")
try:
with tarfile.open(output_file, 'w:gz') as tar:
# 使用 os.walk 可在目录层级提前剪枝,避免进入已忽略目录
for root, dirs, files in os.walk(project_root, topdown=True):
root_path = Path(root)
# 目录剪枝:命中忽略规则或 .git 时不再深入
pruned_dirs = []
for d in dirs:
dir_path = root_path / d
if d == '.git' or filter_obj.should_exclude(dir_path, is_dir=True):
print(f" 排除目录: {dir_path.relative_to(project_root)}")
excluded_files += 1
continue
pruned_dirs.append(d)
dirs[:] = pruned_dirs
for name in files:
path = root_path / name
total_files += 1
# 文件忽略判定
if '.git' in path.parts or filter_obj.should_exclude(path):
excluded_files += 1
print(f" 排除: {path.relative_to(project_root)}")
continue
arcname = path.relative_to(project_root)
tar.add(path, arcname=arcname)
included_files += 1
print(f" 备份: {arcname}")
print(f"\n{'='*60}")
print("备份完成!")
print(f"{'='*60}")
print(f"总文件数: {total_files}")
print(f"已备份: {included_files} 个文件")
print(f"已排除: {excluded_files} 个文件/目录")
print(f"压缩包大小: {output_file.stat().st_size / 1024 / 1024:.2f} MB")
print(f"{'='*60}")
return True
except Exception as e:
print(f"\n❌ 备份失败: {e}")
import traceback
traceback.print_exc()
return False
def main():
parser = argparse.ArgumentParser(
description='快速备份项目(根据 .gitignore 排除文件)',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
使用示例:
# 基本用法(备份到 backups/gz/ 目录)
python backups/快速备份.py
# 指定输出文件
python backups/快速备份.py -o my_backup.tar.gz
# 指定项目根目录
python backups/快速备份.py -p /path/to/project
"""
)
parser.add_argument(
'-p', '--project',
type=str,
default='.',
help='项目根目录路径(默认: 当前目录)'
)
parser.add_argument(
'-o', '--output',
type=str,
help='输出文件路径(默认: backups/备份_YYYYMMDD_HHMMSS.tar.gz'
)
parser.add_argument(
'-g', '--gitignore',
type=str,
default='.gitignore',
help='.gitignore 文件路径(默认: .gitignore'
)
args = parser.parse_args()
# 解析路径
project_root = Path(args.project).resolve()
gitignore_path = Path(args.gitignore).resolve()
if not project_root.exists():
print(f"❌ 错误: 项目目录不存在: {project_root}")
sys.exit(1)
# 确定输出文件路径
if args.output:
output_file = Path(args.output).resolve()
else:
# 默认输出到 backups/gz/ 目录
backup_dir = project_root / 'backups' / 'gz'
backup_dir.mkdir(parents=True, exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
output_file = backup_dir / f'备份_{timestamp}.tar.gz'
# 确保输出目录存在
output_file.parent.mkdir(parents=True, exist_ok=True)
# 创建过滤器
filter_obj = GitignoreFilter(gitignore_path, project_root)
# 执行备份
success = create_backup(project_root, output_file, filter_obj)
sys.exit(0 if success else 1)
if __name__ == '__main__':
main()