git-report-skill/scripts/generate_report.py
2026-04-21 18:17:28 +00:00

202 lines
No EOL
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Сбор данных о коммитах из всех веток репозитория с полным diff.
Сохраняет JSON в /app/hermes_data/git_reports/<repo>_data.json
"""
import os
import re
import sys
import json
import argparse
import subprocess
import tempfile
from datetime import datetime
from collections import defaultdict
REPORTS_BASE_DIR = "/app/hermes_data/git_reports"
def get_all_commits_with_diff(repo_url, since=None, until=None, max_diff_size=5000, max_commits=500):
"""
Клонирует репозиторий, получает коммиты из всех веток через git log --all.
Возвращает список коммитов с полями:
sha, message, author, date, diff, files_changed, insertions, deletions, branches (list)
"""
clone_dir = tempfile.mkdtemp(prefix='git_report_')
try:
subprocess.run(['git', 'clone', repo_url, clone_dir],
check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
print(f"Clone failed: {e.stderr}", file=sys.stderr)
return []
# Загружаем все ветки и теги
try:
subprocess.run(['git', '-C', clone_dir, 'fetch', '--all'],
check=True, capture_output=True, text=True)
except subprocess.CalledProcessError as e:
print(f"Fetch --all failed: {e.stderr}", file=sys.stderr)
# продолжаем, возможно, ветки уже есть
# Получаем список коммитов из всех веток (--all)
log_args = ['git', '-C', clone_dir, 'log', '--all', '--pretty=format:%H|%an|%ai|%s']
if since:
log_args.append(f'--since={since.isoformat()}')
if until:
log_args.append(f'--until={until.isoformat()}')
try:
result = subprocess.run(log_args, capture_output=True, text=True, check=True)
lines = result.stdout.strip().split('\n')
if not lines or (len(lines) == 1 and not lines[0]):
print("No commits found.", file=sys.stderr)
subprocess.run(['rm', '-rf', clone_dir])
return []
except subprocess.CalledProcessError as e:
print(f"git log --all failed: {e.stderr}", file=sys.stderr)
subprocess.run(['rm', '-rf', clone_dir])
return []
# Для каждого коммита определим, в каких ветках он встречается
# Сначала соберём все коммиты без информации о ветках
commits_dict = {}
commit_order = []
for line in lines:
if not line.strip():
continue
parts = line.split('|', 3)
if len(parts) < 4:
continue
sha, author, date, subject = parts
commits_dict[sha] = {
'sha': sha[:8],
'author': author,
'date': date,
'message': subject,
'diff': '',
'files_changed': [],
'insertions': 0,
'deletions': 0,
'branches': []
}
commit_order.append(sha)
# Получаем информацию о ветках для каждого коммита
# Используем git branch --contains
for sha in commit_order[:max_commits]:
try:
branch_proc = subprocess.run(
['git', '-C', clone_dir, 'branch', '-r', '--contains', sha],
capture_output=True, text=True, check=True
)
branches = [b.strip() for b in branch_proc.stdout.split('\n') if b.strip()]
# Добавляем также локальные ветки
local_proc = subprocess.run(
['git', '-C', clone_dir, 'branch', '--contains', sha],
capture_output=True, text=True, check=True
)
local_branches = [b.strip().lstrip('* ') for b in local_proc.stdout.split('\n') if b.strip()]
all_branches = list(set(branches + local_branches))
commits_dict[sha]['branches'] = all_branches
except subprocess.CalledProcessError:
commits_dict[sha]['branches'] = []
# Теперь собираем diff и статистику для каждого коммита (ограничимся max_commits)
commits = []
for idx, sha in enumerate(commit_order):
if idx >= max_commits:
print(f"Reached max commits limit ({max_commits}), stopping.", file=sys.stderr)
break
commit = commits_dict[sha]
# Получаем diff (с флагом -m для merge-коммитов)
try:
diff_proc = subprocess.run(
['git', '-C', clone_dir, 'show', '-m', '--patch', '--unified=3', sha],
capture_output=True, text=True, check=True
)
diff_output = diff_proc.stdout
if not diff_output.strip():
diff_output = "(no diff content possibly empty commit or merge commit without changes)"
except subprocess.CalledProcessError as e:
diff_output = f"(error getting diff: {e.stderr})"
if len(diff_output) > max_diff_size:
diff_output = diff_output[:max_diff_size] + "\n... (diff truncated)"
commit['diff'] = diff_output
# Получаем статистику
try:
stat_proc = subprocess.run(
['git', '-C', clone_dir, 'show', '--numstat', '--pretty=format:', sha],
capture_output=True, text=True, check=True
)
stat_lines = stat_proc.stdout.strip().split('\n')
insertions = 0
deletions = 0
files_changed = []
for sl in stat_lines:
if not sl.strip():
continue
parts_stat = sl.split('\t')
if len(parts_stat) >= 3:
add_str, del_str, fname = parts_stat[0], parts_stat[1], parts_stat[2]
if add_str != '-':
insertions += int(add_str)
if del_str != '-':
deletions += int(del_str)
files_changed.append(fname)
commit['insertions'] = insertions
commit['deletions'] = deletions
commit['files_changed'] = files_changed
except subprocess.CalledProcessError:
commit['insertions'] = 0
commit['deletions'] = 0
commit['files_changed'] = []
# Отладочный вывод
print(f"Commit {commit['sha']}: diff size = {len(commit['diff'])} bytes, branches: {len(commit['branches'])}", file=sys.stderr)
commits.append(commit)
subprocess.run(['rm', '-rf', clone_dir])
return commits
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--repo-url', required=True)
parser.add_argument('--since', help='YYYY-MM-DD')
parser.add_argument('--until', help='YYYY-MM-DD')
parser.add_argument('--max-diff-size', type=int, default=5000,
help='Max characters of diff per commit (default 5000)')
parser.add_argument('--max-commits', type=int, default=500,
help='Max commits to process (default 500)')
args = parser.parse_args()
since = datetime.fromisoformat(args.since) if args.since else None
until = datetime.fromisoformat(args.until) if args.until else None
repo_name_match = re.search(r'([^/]+?)(?:\.git)?$', args.repo_url.rstrip('/'))
repo_name = repo_name_match.group(1) if repo_name_match else "unknown"
print(f"Analyzing {repo_name} (all branches)...", file=sys.stderr)
commits = get_all_commits_with_diff(args.repo_url, since, until,
args.max_diff_size, args.max_commits)
data = {
"repo": repo_name,
"period": {
"since": since.isoformat() if since else "full_history_start",
"until": until.isoformat() if until else "full_history_end"
},
"commits": commits,
"issues": []
}
os.makedirs(REPORTS_BASE_DIR, exist_ok=True)
data_file = os.path.join(REPORTS_BASE_DIR, f"{repo_name}_data.json")
with open(data_file, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(data_file)
if __name__ == '__main__':
main()