Add scripts
This commit is contained in:
parent
3e0cf88814
commit
30479e6c37
2 changed files with 205 additions and 0 deletions
202
scripts/generate_report.py
Normal file
202
scripts/generate_report.py
Normal file
|
|
@ -0,0 +1,202 @@
|
|||
#!/usr/bin/env python3
|
||||
"""
|
||||
Сбор данных о коммитах из всех веток репозитория с полным diff.
|
||||
Сохраняет JSON в /app/hermes_data/git_reports/<repo>_data.json
|
||||
"""
|
||||
|
||||
import os
|
||||
import re
|
||||
import sys
|
||||
import json
|
||||
import argparse
|
||||
import subprocess
|
||||
import tempfile
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
REPORTS_BASE_DIR = "/app/hermes_data/git_reports"
|
||||
|
||||
def get_all_commits_with_diff(repo_url, since=None, until=None, max_diff_size=5000, max_commits=500):
|
||||
"""
|
||||
Клонирует репозиторий, получает коммиты из всех веток через git log --all.
|
||||
Возвращает список коммитов с полями:
|
||||
sha, message, author, date, diff, files_changed, insertions, deletions, branches (list)
|
||||
"""
|
||||
clone_dir = tempfile.mkdtemp(prefix='git_report_')
|
||||
try:
|
||||
subprocess.run(['git', 'clone', repo_url, clone_dir],
|
||||
check=True, capture_output=True, text=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Clone failed: {e.stderr}", file=sys.stderr)
|
||||
return []
|
||||
|
||||
# Загружаем все ветки и теги
|
||||
try:
|
||||
subprocess.run(['git', '-C', clone_dir, 'fetch', '--all'],
|
||||
check=True, capture_output=True, text=True)
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"Fetch --all failed: {e.stderr}", file=sys.stderr)
|
||||
# продолжаем, возможно, ветки уже есть
|
||||
|
||||
# Получаем список коммитов из всех веток (--all)
|
||||
log_args = ['git', '-C', clone_dir, 'log', '--all', '--pretty=format:%H|%an|%ai|%s']
|
||||
if since:
|
||||
log_args.append(f'--since={since.isoformat()}')
|
||||
if until:
|
||||
log_args.append(f'--until={until.isoformat()}')
|
||||
|
||||
try:
|
||||
result = subprocess.run(log_args, capture_output=True, text=True, check=True)
|
||||
lines = result.stdout.strip().split('\n')
|
||||
if not lines or (len(lines) == 1 and not lines[0]):
|
||||
print("No commits found.", file=sys.stderr)
|
||||
subprocess.run(['rm', '-rf', clone_dir])
|
||||
return []
|
||||
except subprocess.CalledProcessError as e:
|
||||
print(f"git log --all failed: {e.stderr}", file=sys.stderr)
|
||||
subprocess.run(['rm', '-rf', clone_dir])
|
||||
return []
|
||||
|
||||
# Для каждого коммита определим, в каких ветках он встречается
|
||||
# Сначала соберём все коммиты без информации о ветках
|
||||
commits_dict = {}
|
||||
commit_order = []
|
||||
for line in lines:
|
||||
if not line.strip():
|
||||
continue
|
||||
parts = line.split('|', 3)
|
||||
if len(parts) < 4:
|
||||
continue
|
||||
sha, author, date, subject = parts
|
||||
commits_dict[sha] = {
|
||||
'sha': sha[:8],
|
||||
'author': author,
|
||||
'date': date,
|
||||
'message': subject,
|
||||
'diff': '',
|
||||
'files_changed': [],
|
||||
'insertions': 0,
|
||||
'deletions': 0,
|
||||
'branches': []
|
||||
}
|
||||
commit_order.append(sha)
|
||||
|
||||
# Получаем информацию о ветках для каждого коммита
|
||||
# Используем git branch --contains
|
||||
for sha in commit_order[:max_commits]:
|
||||
try:
|
||||
branch_proc = subprocess.run(
|
||||
['git', '-C', clone_dir, 'branch', '-r', '--contains', sha],
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
branches = [b.strip() for b in branch_proc.stdout.split('\n') if b.strip()]
|
||||
# Добавляем также локальные ветки
|
||||
local_proc = subprocess.run(
|
||||
['git', '-C', clone_dir, 'branch', '--contains', sha],
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
local_branches = [b.strip().lstrip('* ') for b in local_proc.stdout.split('\n') if b.strip()]
|
||||
all_branches = list(set(branches + local_branches))
|
||||
commits_dict[sha]['branches'] = all_branches
|
||||
except subprocess.CalledProcessError:
|
||||
commits_dict[sha]['branches'] = []
|
||||
|
||||
# Теперь собираем diff и статистику для каждого коммита (ограничимся max_commits)
|
||||
commits = []
|
||||
for idx, sha in enumerate(commit_order):
|
||||
if idx >= max_commits:
|
||||
print(f"Reached max commits limit ({max_commits}), stopping.", file=sys.stderr)
|
||||
break
|
||||
commit = commits_dict[sha]
|
||||
|
||||
# Получаем diff (с флагом -m для merge-коммитов)
|
||||
try:
|
||||
diff_proc = subprocess.run(
|
||||
['git', '-C', clone_dir, 'show', '-m', '--patch', '--unified=3', sha],
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
diff_output = diff_proc.stdout
|
||||
if not diff_output.strip():
|
||||
diff_output = "(no diff content – possibly empty commit or merge commit without changes)"
|
||||
except subprocess.CalledProcessError as e:
|
||||
diff_output = f"(error getting diff: {e.stderr})"
|
||||
|
||||
if len(diff_output) > max_diff_size:
|
||||
diff_output = diff_output[:max_diff_size] + "\n... (diff truncated)"
|
||||
commit['diff'] = diff_output
|
||||
|
||||
# Получаем статистику
|
||||
try:
|
||||
stat_proc = subprocess.run(
|
||||
['git', '-C', clone_dir, 'show', '--numstat', '--pretty=format:', sha],
|
||||
capture_output=True, text=True, check=True
|
||||
)
|
||||
stat_lines = stat_proc.stdout.strip().split('\n')
|
||||
insertions = 0
|
||||
deletions = 0
|
||||
files_changed = []
|
||||
for sl in stat_lines:
|
||||
if not sl.strip():
|
||||
continue
|
||||
parts_stat = sl.split('\t')
|
||||
if len(parts_stat) >= 3:
|
||||
add_str, del_str, fname = parts_stat[0], parts_stat[1], parts_stat[2]
|
||||
if add_str != '-':
|
||||
insertions += int(add_str)
|
||||
if del_str != '-':
|
||||
deletions += int(del_str)
|
||||
files_changed.append(fname)
|
||||
commit['insertions'] = insertions
|
||||
commit['deletions'] = deletions
|
||||
commit['files_changed'] = files_changed
|
||||
except subprocess.CalledProcessError:
|
||||
commit['insertions'] = 0
|
||||
commit['deletions'] = 0
|
||||
commit['files_changed'] = []
|
||||
|
||||
# Отладочный вывод
|
||||
print(f"Commit {commit['sha']}: diff size = {len(commit['diff'])} bytes, branches: {len(commit['branches'])}", file=sys.stderr)
|
||||
commits.append(commit)
|
||||
|
||||
subprocess.run(['rm', '-rf', clone_dir])
|
||||
return commits
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
parser.add_argument('--repo-url', required=True)
|
||||
parser.add_argument('--since', help='YYYY-MM-DD')
|
||||
parser.add_argument('--until', help='YYYY-MM-DD')
|
||||
parser.add_argument('--max-diff-size', type=int, default=5000,
|
||||
help='Max characters of diff per commit (default 5000)')
|
||||
parser.add_argument('--max-commits', type=int, default=500,
|
||||
help='Max commits to process (default 500)')
|
||||
args = parser.parse_args()
|
||||
|
||||
since = datetime.fromisoformat(args.since) if args.since else None
|
||||
until = datetime.fromisoformat(args.until) if args.until else None
|
||||
|
||||
repo_name_match = re.search(r'([^/]+?)(?:\.git)?$', args.repo_url.rstrip('/'))
|
||||
repo_name = repo_name_match.group(1) if repo_name_match else "unknown"
|
||||
|
||||
print(f"Analyzing {repo_name} (all branches)...", file=sys.stderr)
|
||||
commits = get_all_commits_with_diff(args.repo_url, since, until,
|
||||
args.max_diff_size, args.max_commits)
|
||||
|
||||
data = {
|
||||
"repo": repo_name,
|
||||
"period": {
|
||||
"since": since.isoformat() if since else "full_history_start",
|
||||
"until": until.isoformat() if until else "full_history_end"
|
||||
},
|
||||
"commits": commits,
|
||||
"issues": []
|
||||
}
|
||||
|
||||
os.makedirs(REPORTS_BASE_DIR, exist_ok=True)
|
||||
data_file = os.path.join(REPORTS_BASE_DIR, f"{repo_name}_data.json")
|
||||
with open(data_file, 'w', encoding='utf-8') as f:
|
||||
json.dump(data, f, ensure_ascii=False, indent=2)
|
||||
print(data_file)
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
3
scripts/requirements.txt
Normal file
3
scripts/requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
requests>=2.31.0
|
||||
GitPython>=3.1.40
|
||||
python-dateutil>=2.8.2
|
||||
Loading…
Add table
Add a link
Reference in a new issue