-
Notifications
You must be signed in to change notification settings - Fork 1
/
collate_readmes.py
257 lines (215 loc) · 7.5 KB
/
collate_readmes.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
"""Collate the README.md files of all repos generated from a template, into a single Markdown file
that contains a section for each repo.
Each individual README is prepended with a header that includes the GitHub login, as inferred from the name of the
generated repo. If the README already begins with a header, the login is appended, or substituted if the header is
simply "About Me".
If a file Roster.csv with columns "GitHub Login", "Preferred Name", and "Last Name" is present in the current directory,
these names are used instead of the GitHub login.
Usage:
python collate_readmes.py
python collate_readmes.py | pandoc --from markdown --metadata pagetitle="About Me" -s -o about.html
"""
from datetime import datetime
from dateutil import tz
import re
import sys
import json
import os
import subprocess
from pathlib import Path
from string import Template
import numpy as np
import pandas as pd
from graphqlclient import GraphQLClient
def get_git_config(name):
result = subprocess.run(
"git config".split() + [name], capture_output=True, text=True
)
if result.returncode:
raise Exception(result.stderr.strip())
return result.stdout.rstrip()
GITHUB_ACCESS_TOKEN = os.environ.get("GITHUB_ACCESS_TOKEN") or get_git_config(
"user.accesstoken"
)
GH_CLIENT = GraphQLClient("https://api.github.com/graphql")
GH_CLIENT.inject_token(f"token {GITHUB_ACCESS_TOKEN}")
def query(gql, variables=None):
"""Perform a GraphQL query, with error detection and variable substition."""
variables = variables or {}
q = Template(gql).substitute(**{k: json.dumps(v) for k, v in variables.items()})
result = json.loads(GH_CLIENT.execute(q, variables))
if "errors" in result:
# TODO include err['locations'] = {'line', 'column'}
raise Exception("\n".join(err["message"] for err in result["errors"]))
return result["data"]
ORG_REPOS_GQL = """
query {
organization(login: $organization_login) {
repositories(first: 100, after: $cursor) {
nodes {
name
nameWithOwner
readme: object(expression: "master:README.md") {
... on Blob {
text
}
}
templateRepository {
nameWithOwner
}
ref(qualifiedName: "master") {
target {
... on Commit {
history(first: 100) {
edges {
node {
oid
authoredDate
committedDate
pushedDate
author {
name
email
date
}
}
}
}
}
}
}
}
pageInfo {
endCursor
hasNextPage
}
}
}
}
"""
def get_generated_repos(name_with_owner):
org_login = name_with_owner.split("/")[0]
cursor = None
repos = []
while True:
variables = {"organization_login": org_login, "cursor": cursor}
result = query(ORG_REPOS_GQL, variables)
repos += result["organization"]["repositories"]["nodes"]
pageInfo = result["organization"]["repositories"]["pageInfo"]
if not pageInfo["hasNextPage"]:
break
cursor = pageInfo["endCursor"]
master = next(r for r in repos if r["nameWithOwner"] == name_with_owner)
forks = [
r
for r in repos
if r["templateRepository"]
and r["templateRepository"]["nameWithOwner"] == name_with_owner
]
return master, forks
def longest_prefix(names):
"""Find the longest common prefix of the repository names."""
return next(
names[0][:n]
for n in range(min(len(s) for s in names), 0, -1)
if len({s[:n] for s in names}) == 1
)
def annotate_repos(repos, roster):
"""Annotate repo['login'] with the login of the student who generated the repo
Find the longest common prefix of the repository names.
"""
common_prefix = longest_prefix([r["name"] for r in repos])
for r in repos:
login = r["name"][len(common_prefix) :]
r["login"] = login
r["author"] = roster.get(login, login)
# Annotate repo['commits'] with commits that Christian didn't author
r["commits"] = [
c["node"]
for c in r["ref"]["target"]["history"]["edges"]
if c["node"]["author"]["email"] != "[email protected]"
]
def read_roster():
# Set login_names to a dict login -> name
roster_path = Path("Roster.csv")
if not roster_path.exists():
return {}
roster = pd.read_csv(roster_path)
column_first_names = ["Preferred", "English", "First"]
first_names = next(
(roster[name] for name in column_first_names if name in roster), None
)
names = first_names + " " + roster["Last"]
login_names = {
login: name
for login, name in zip(roster["GitHub Login"], names)
if isinstance(name, str)
}
return login_names
def is_late_commit(commit):
return commit["author"]["date"] > "2019-09-09T03:00:00+08:00"
def print_late_commits(repos):
# Show repos that were turned in late or not at all
# report missing and late assignments
warnings = {
"No commits": [r for r in repos if not r["commits"]],
"Late": [r for r in repos if all(map(is_late_commit, r["commits"]))],
"Some late commits": [
r for r in repos if any(map(is_late_commit, r["commits"]))
],
}
# only reported
reported = []
for label, rs in warnings.items():
rs = [r for r in rs if r not in reported]
reported += rs
if rs:
print(f"{label}: {', '.join(sorted(r['login'] for r in rs))}")
for r in repos:
commits = [c for c in r["commits"] if is_late_commit(c)]
if not commits:
continue
print(f" {r['login']}:")
timestamps = {c["author"]["date"] for c in commits}
for ts in timestamps:
dt = (
datetime.fromisoformat(ts)
.astimezone(tz.gettz("China"))
.strftime("%H:%M %a, %b %-d")
)
print(f" {dt}")
def increment_headings(markdown):
"""Increment all the heading levels of a markdown string, if it contains level-one heading.
This also normalizes heading lines "#\s*title" -> "# title"
Note: this doesn't know not to look in fenced blocks
"""
# Normalize the '## ' spacing
markdown = re.sub(r"^(#+)\s*", r"\1 ", markdown, 0, re.M)
# If there's an H1, increment all the Hn's
if re.compile(r"^# ", re.M).search(markdown):
markdown = re.sub(r"^(#+)", r"\1# ", markdown, 0, re.M)
return markdown
def print_collated_readme(repos):
# print collated readme
for r in repos:
name = r["author"]
title, about = None, r["readme"]["text"].strip()
if about.startswith("# "):
title, about = about.split("\n", 1)
if not title or title == "# About Me":
title = "# " + name
if name not in title:
title += f" ({name})"
print(increment_headings(title + "\n" + about))
print("\n---\n")
def main():
master, repos = get_generated_repos("application-lab/1-WELCOME-TO-APPLAB")
annotate_repos(repos, read_roster())
repos = [r for r in repos if r["commits"]]
repos.sort(key=lambda r: r["author"])
if False:
print_late_commits(repos)
if True: # print collated readme
print_collated_readme(repos)
if __name__ == "__main__":
main()