-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathcleanup_archive.py
More file actions
executable file
·184 lines (155 loc) · 5.92 KB
/
cleanup_archive.py
File metadata and controls
executable file
·184 lines (155 loc) · 5.92 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
#!/usr/bin/env python3
"""
cleanup_archive.py - Post-process archived TNNT tournament pages
Usage: python cleanup_archive.py [year]
This script processes the mirrored tournament pages to:
1. Fix logo link to point to / (main site)
2. Fix title link to point to index.html (archive home)
3. Remove login form functionality
4. Remove admin panel links
5. Add archive banner below header
6. Change CSS to archive gray color scheme
"""
import os
import re
import sys
from pathlib import Path
def get_archive_dir(year):
"""Get the archive directory path for a given year."""
script_dir = Path(__file__).parent
return script_dir / 'tnnt' / 'static' / 'archives' / str(year)
def process_css(css_path, year):
"""Modify CSS to use archive gray color scheme."""
if not css_path.exists():
print(f" Warning: CSS file not found: {css_path}")
return
print(f" Processing CSS: {css_path}")
content = css_path.read_text()
# Change background colors from blue to gray
# Body background: #003 -> #111
content = re.sub(
r'background-color:\s*#003\b',
'background-color: #111',
content
)
# Table header background: #237 -> #555
content = re.sub(
r'background-color:\s*#237\b',
'background-color: #555',
content
)
css_path.write_text(content)
print(" - Updated color scheme to archive gray")
def process_html(html_path, year):
"""Process an HTML file to apply archive modifications."""
print(f" Processing: {html_path.name}")
content = html_path.read_text()
modified = False
# 0. Add UTF-8 charset meta tag if missing
if '<meta charset=' not in content.lower() and '<head>' in content:
content = content.replace('<head>', '<head>\n <meta charset="UTF-8">')
modified = True
print(" - Added UTF-8 charset")
# 1. Fix title link to point to index.html (for archive home)
# Change href="/" in titlelink to href="index.html"
if '<a id="titlelink" href="/">' in content:
content = content.replace(
'<a id="titlelink" href="/">',
'<a id="titlelink" href="index.html">'
)
modified = True
print(" - Fixed title link")
# 1b. Fix logo link to point to / (main site)
# The logo in #logo td should link back to main site, not archive home
# wget converts it to index.html, we need to change it back to /
logo_pattern = r'(<td id="logo"[^>]*>\s*<a href=")index\.html(")'
if re.search(logo_pattern, content):
content = re.sub(logo_pattern, r'\1/\2', content)
modified = True
print(" - Fixed logo link to main site")
# 1c. Fix ARCHIVES nav link to point to /archives (main site)
# The archives link should go to the main site, not the archived archives page
if 'href="archives.html"' in content:
content = content.replace('href="archives.html"', 'href="/archives"')
modified = True
print(" - Fixed ARCHIVES link to main site")
# 2. Add archive banner if not already present
banner = f'<h2>{year} ARCHIVE</h2>'
if banner not in content:
# Insert banner after </table> within header, before </header>
# Pattern: </table>\s*(</header>)
pattern = r'(</table>\s*)(</header>)'
replacement = r'\1 <tr>\n <td><br />' + banner + r'</td>\n </tr>\n\2'
new_content = re.sub(pattern, replacement, content)
if new_content != content:
content = new_content
modified = True
print(" - Added archive banner")
# 3. Comment out login forms
# Match <form method="post"> ... </form> containing login elements
if '<form method="post">' in content and 'csrfmiddlewaretoken' in content:
# Comment out the form (avoid double commenting)
if '<!-- <form method="post">' not in content:
content = re.sub(
r'(<form method="post">.*?</form>)',
r'<!-- \1 -->',
content,
flags=re.DOTALL
)
modified = True
print(" - Commented out login form")
# 3b. Fix double --> from previous comment processing
if '</form> --> -->' in content:
content = content.replace('</form> --> -->', '</form> -->')
modified = True
print(" - Fixed stray comment closing tag")
# 4. Remove admin panel link from navigation
if 'admin-panel' in content:
content = re.sub(
r'<a href="[^"]*admin-panel[^"]*"[^>]*>[^<]*</a>\s*',
'',
content
)
modified = True
print(" - Removed admin panel link")
# 5. Remove MY CLAN link from navigation (only shown when logged in)
if 'clanmgmt' in content and 'MY CLAN' in content:
content = re.sub(
r'<a href="[^"]*clanmgmt[^"]*"[^>]*>MY CLAN</a>\s*',
'',
content
)
modified = True
print(" - Removed MY CLAN link")
if modified:
html_path.write_text(content)
def process_archive(year):
"""Process all files in the archive directory."""
archive_dir = get_archive_dir(year)
if not archive_dir.exists():
print(f"Error: Archive directory not found: {archive_dir}")
sys.exit(1)
print(f"Processing archive for year {year}")
print(f"Directory: {archive_dir}")
print()
# Process CSS file
css_path = archive_dir / 'static' / 'css' / 'default.css'
process_css(css_path, year)
print()
# Process all HTML files
html_count = 0
for html_path in archive_dir.rglob('*.html'):
process_html(html_path, year)
html_count += 1
print()
print(f"Processed {html_count} HTML files")
print("Archive cleanup complete!")
def main():
if len(sys.argv) > 1:
year = sys.argv[1]
else:
import datetime
year = datetime.datetime.now().year
process_archive(year)
if __name__ == '__main__':
main()