-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest_urls.py
More file actions
155 lines (127 loc) Β· 5.66 KB
/
test_urls.py
File metadata and controls
155 lines (127 loc) Β· 5.66 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
#!/usr/bin/env python3
"""
URL Validation Script
Tests that GitHub URLs are being constructed correctly for the repository browser.
"""
import json
import urllib.request
import urllib.error
def test_github_urls():
"""Test that GitHub URLs are accessible"""
print("π Testing GitHub URL construction...")
# Load repository structure
try:
with open('repo_structure.json', 'r') as f:
repo_data = json.load(f)
except FileNotFoundError:
print("β repo_structure.json not found. Run 'python3 generate_structure.py' first.")
return False
repo_name = repo_data['repository']
branch = repo_data['branch']
print(f"Repository: {repo_name}")
print(f"Branch: {branch}")
print()
# Test a few sample files
sample_files = []
def collect_files(items, max_files=5):
for item in items:
if len(sample_files) >= max_files:
break
if item['type'] == 'file':
sample_files.append(item)
elif item['type'] == 'directory' and 'children' in item:
collect_files(item['children'], max_files)
collect_files(repo_data['structure'])
print(f"Testing {len(sample_files)} sample files:")
print("=" * 50)
success_count = 0
for file_item in sample_files:
file_path = file_item['path']
file_name = file_item['name']
# Construct URLs (same logic as browser)
view_url = f"https://github.com/{repo_name}/blob/{branch}/{file_path}"
raw_url = f"https://raw.githubusercontent.com/{repo_name}/{branch}/{file_path}"
print(f"π {file_name}")
print(f" View: {view_url}")
print(f" Raw: {raw_url}")
# Test if URLs are accessible (just check if they return 200 or redirect)
try:
# Test view URL
req = urllib.request.Request(view_url)
req.add_header('User-Agent', 'Mozilla/5.0 (Repository Browser Test)')
with urllib.request.urlopen(req, timeout=10) as response:
if response.status in [200, 301, 302]:
print(f" β
View URL accessible (status: {response.status})")
else:
print(f" β οΈ View URL returned status: {response.status}")
# Test raw URL
req = urllib.request.Request(raw_url)
req.add_header('User-Agent', 'Mozilla/5.0 (Repository Browser Test)')
with urllib.request.urlopen(req, timeout=10) as response:
if response.status in [200, 301, 302]:
print(f" β
Raw URL accessible (status: {response.status})")
success_count += 1
else:
print(f" β οΈ Raw URL returned status: {response.status}")
except urllib.error.HTTPError as e:
if e.code == 404:
print(f" β File not found (404) - repository may be private or file doesn't exist")
else:
print(f" β HTTP Error {e.code}: {e.reason}")
except urllib.error.URLError as e:
print(f" β URL Error: {e.reason}")
except Exception as e:
print(f" β Error: {e}")
print()
print("=" * 50)
print(f"Summary: {success_count}/{len(sample_files)} files successfully accessible")
if success_count == len(sample_files):
print("π All URLs are working correctly!")
return True
elif success_count > 0:
print("β οΈ Some URLs are working. If repository is private, this is expected.")
return True
else:
print("β No URLs are accessible. Check repository name and network connection.")
return False
def test_url_construction():
"""Test URL construction logic"""
print("π§ͺ Testing URL construction logic...")
test_cases = [
{
'repo': 'mydennislab/2025-khoe-san-novel',
'branch': 'main',
'file_path': 'galaxy_kraken/bacteria_summary.tsv',
'expected_view': 'https://github.com/mydennislab/2025-khoe-san-novel/blob/main/galaxy_kraken/bacteria_summary.tsv',
'expected_raw': 'https://raw.githubusercontent.com/mydennislab/2025-khoe-san-novel/main/galaxy_kraken/bacteria_summary.tsv'
}
]
for test in test_cases:
repo = test['repo']
branch = test['branch']
file_path = test['file_path']
# Handle different repository formats (same as browser logic)
if repo.startswith('git@github.com:'):
repo = repo.replace('git@github.com:', '')
elif repo.startswith('https://github.com/'):
repo = repo.replace('https://github.com/', '')
repo = repo.replace('.git', '')
view_url = f"https://github.com/{repo}/blob/{branch}/{file_path}"
raw_url = f"https://raw.githubusercontent.com/{repo}/{branch}/{file_path}"
print(f"Repository: {test['repo']}")
print(f"Generated view URL: {view_url}")
print(f"Expected view URL: {test['expected_view']}")
print(f"View URL match: {'β
' if view_url == test['expected_view'] else 'β'}")
print()
print(f"Generated raw URL: {raw_url}")
print(f"Expected raw URL: {test['expected_raw']}")
print(f"Raw URL match: {'β
' if raw_url == test['expected_raw'] else 'β'}")
print()
print("β
URL construction logic verified!")
if __name__ == "__main__":
print("π Repository Browser URL Validation")
print("=" * 40)
print()
test_url_construction()
print()
test_github_urls()