-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathupdate_github_names.py
More file actions
152 lines (127 loc) · 5.29 KB
/
update_github_names.py
File metadata and controls
152 lines (127 loc) · 5.29 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env python3
"""
Script to update github_name values in repositories.json
- Remove sikatech- prefix
- Use gitlab_path as github_name
- Identify duplicates for user to choose names
"""
import json
import re
import sys
from collections import defaultdict
def normalize_name(name):
"""
Normalize name for GitHub:
- All lowercase
- Replace underscores with hyphens
- Remove special characters
- Replace multiple hyphens with single hyphen
- Remove leading/trailing hyphens
"""
# Convert to lowercase
normalized = name.lower()
# Replace underscores with hyphens
normalized = normalized.replace('_', '-')
# Remove special characters (keep only alphanumeric and hyphens)
normalized = re.sub(r'[^a-z0-9-]', '', normalized)
# Replace multiple consecutive hyphens with single hyphen
normalized = re.sub(r'-+', '-', normalized)
# Remove leading/trailing hyphens
normalized = normalized.strip('-')
return normalized
def update_repositories_json(file_path):
"""Update github_name values in repositories.json"""
# Read the JSON file
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# Track names and duplicates
name_to_repos = defaultdict(list)
# First pass: normalize all names and find duplicates
for repo in data.get('repositories', []):
gitlab_path = repo.get('gitlab_path', '')
if gitlab_path:
normalized = normalize_name(gitlab_path)
name_to_repos[normalized].append({
'gitlab_id': repo.get('gitlab_id'),
'gitlab_path': gitlab_path,
'gitlab_full_path': repo.get('gitlab_full_path', ''),
'old_github_name': repo.get('github_name', '')
})
# Find duplicates
duplicates = {name: repos for name, repos in name_to_repos.items() if len(repos) > 1}
# Print duplicates for user to review
if duplicates:
print("=" * 80)
print("DUPLICATE REPOSITORY NAMES FOUND:")
print("=" * 80)
print("\nThe following normalized names would result in duplicates:")
print("(You'll need to choose unique names for these)\n")
for normalized_name, repos in sorted(duplicates.items()):
print(f"\nNormalized name: '{normalized_name}'")
print(f" Found {len(repos)} repositories:")
for i, repo in enumerate(repos, 1):
print(f" {i}. ID: {repo['gitlab_id']}")
print(f" Path: {repo['gitlab_path']}")
print(f" Full Path: {repo['gitlab_full_path']}")
print(f" Current GitHub Name: {repo['old_github_name']}")
print("\n" + "=" * 80)
print(f"Total duplicates found: {len(duplicates)} normalized names")
print("=" * 80)
# Ask user if they want to proceed with non-duplicates
print("\nProceeding to update non-duplicate names...")
else:
print("No duplicates found! All names are unique.")
# Second pass: update non-duplicate names
updates = []
skipped = []
for repo in data.get('repositories', []):
gitlab_path = repo.get('gitlab_path', '')
if gitlab_path:
normalized = normalize_name(gitlab_path)
old_name = repo.get('github_name', '')
# Check if this is a duplicate
if normalized in duplicates:
skipped.append({
'gitlab_id': repo.get('gitlab_id'),
'gitlab_path': gitlab_path,
'normalized_name': normalized,
'old_github_name': old_name
})
else:
new_name = normalized
if old_name != new_name:
updates.append({
'gitlab_id': repo.get('gitlab_id'),
'gitlab_path': gitlab_path,
'old_name': old_name,
'new_name': new_name
})
repo['github_name'] = new_name
# Write back to file
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, indent=2, ensure_ascii=False)
# Print summary
print(f"\nUpdated {len(updates)} repository names (non-duplicates):")
for update in updates[:20]: # Show first 20
print(f" {update['gitlab_path']}: {update['old_name']} -> {update['new_name']}")
if len(updates) > 20:
print(f" ... and {len(updates) - 20} more")
if skipped:
print(f"\nSkipped {len(skipped)} repositories with duplicate names (see above for details)")
return len(updates), len(duplicates), skipped
if __name__ == '__main__':
file_path = 'repositories.json'
if len(sys.argv) > 1:
file_path = sys.argv[1]
try:
updates, dup_count, skipped = update_repositories_json(file_path)
print(f"\nSummary:")
print(f" - Updated: {updates} repositories")
print(f" - Duplicates found: {dup_count} normalized names")
print(f" - Skipped: {len(skipped)} repositories")
sys.exit(0)
except Exception as e:
print(f"Error: {e}", file=sys.stderr)
import traceback
traceback.print_exc()
sys.exit(1)