-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreplace_string_with_another.py
178 lines (148 loc) · 6.46 KB
/
replace_string_with_another.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python3
"""
Usage:
python -m replace_string_with_another.py
Requires:
GITHUB_AUTH token in local environment
Description:
For each repo in your org, looks for a given string. If the string exists,
switches to a new branch, replaces the string with a new string, commits
changes, and opens a PR. Everything currently hard-coded.
Note:
swap_strings has hardcoded shell commands that are wonky due to OSX. If
you're not on OSX you should examine and change them.
"""
import datetime
import json
import logging
import subprocess
import sys
import time
from github_helpers import *
from shell_helpers import *
# Switch to DEBUG for additional debugging info
logging.basicConfig(stream=sys.stderr, level=logging.INFO)
LOG = logging.getLogger(__name__)
def main(org, root_dir, old_string, new_string, exclude_private=False, interactive=False):
"""
Goes through all repos in an org, clones them (or switches to the default
branch and then pulls latest changes), searches for the specified string, if
found makes a new branch, switches the string with the new string, creates a
pull request.
* org (str): GitHub organization
* root_dir (str): path to directory to clone repos (on Mac, may look like
`/Users/<uname>/path/to/dir`
* old_string: what string we're looking to see if each repo has
* new_string: if old_string is found, what we should replace it with
* exclude_private (bool): if True, script skips private repos (default
False)
* interactive (bool): if True, pauses before committing files upstream and
awaits user confirmation
"""
gh_headers = get_github_headers()
branch_name = "tcril/fix-gh-org-url"
commit_msg = "fix: fix github url strings (org edx -> openedx)"
pr_details = {
"title": "Fix github url strings (org edx -> openedx)",
"body": "## This PR was autogenerated\n\nThis pr replaces the old GitHub organization, github.com/edx, with the new GitHub organization, github.com/openedx.\n\nTagging @openedx/tcril-engineering for review, but others are welcome to provide review.\n\nRef: https://github.com/openedx/tcril-engineering/issues/42"
}
prs = []
pr_failed = []
repos_skipped = []
for repo_data in get_repos(gh_headers, org, exclude_private):
(rname, ssh_url, dbranch, _, count) = repo_data
LOG.info("\n\n******* CHECKING REPO: {} ({}) ************".format(rname, count))
repo_path = get_repo_path(rname, root_dir)
# clone repo; if exists, checkout the default branch & pull latest
clone_repo(root_dir, repo_path, ssh_url, dbranch)
# Search for the string; fail fast if none exist
if not found(old_string, repo_path):
LOG.info("Did not find string {}".format(old_string))
continue
if not new_branch(repo_path, branch_name):
# this branch already exists
LOG.info("Skipping {}, branch already exists".format(rname))
repos_skipped.append([rname, "branch exists"])
continue
# Swap old string for new string
swap_strings(old_string, new_string, repo_path)
if interactive:
try:
interactive_commit(repo_path)
except RepoError:
# move on to next repo
continue
make_commit(repo_path, commit_msg)
try:
pr_url = make_pr(gh_headers, org, rname, branch_name, dbranch, pr_details)
prs.append(pr_url)
except PrCreationError as pr_err:
LOG.info(pr_err.__str__())
# info you need to retry
pr_failed.append((org, rname, branch_name, dbranch, pr_details))
# Without, you hit secondary rate limits if you have more than ~30
# repos. I tried 3, too short. 5, got through 80. 30, totally worked.
# there's a good number in between that i'm sure but they don't help
# much. From GH docs: Requests that create content which triggers
# notifications, such as issues, comments and pull requests, may be
# further limited and will not include a Retry-After header in the
# response. Please create this content at a reasonable pace to avoid
# further limiting.
time.sleep(30)
LOG.info(
"Processed {} repos; see output/prs.json ({}) and output/failed.json ({})".format(
count, len(prs), len(pr_failed)
)
)
LOG.info("Skipped these repos as branch was already defined: {}".format(repos_skipped))
ts = str(datetime.datetime.now())[:19]
with open(f"output/prs_{ts}.json", "w") as f:
f.write(json.dumps(prs))
with open(f"output/failed_{ts}.json", "w") as f2:
f2.write(json.dumps(pr_failed))
def found(old_string, repo_path):
"""
Looks through the repo specified by `repo_path` to see if there are any
occurances of `old_string`
Returns bool: True if the string is found, else False
"""
# grep -r old_string . returns an array of which files match the string.
proc = subprocess.Popen(
f"grep -r {old_string} .",
cwd=repo_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True
)
res, _ = proc.communicate()
return len(res) > 0
def swap_strings(old_string, new_string, repo_path):
"""
Replaces all occurances of `old_string` in the repo with `new_string`
recursively starting in the root directory given by `repo_path`
Does not inspect the `.git/` directory.
"""
# Command one: Look for files with the old_string, skipping the .git dir
c1 = f'/usr/bin/grep -rl --exclude-dir=.git "{old_string}"'
# Command two: Swap!
# delimiter for sed; rather than escape we'll use _ if we're replacing a URL
d = "/"
if "/" in old_string or "/" in new_string:
d = "_"
# NOTE!!! This is the OSX command, drop `LC_ALL=C` and `'' -e` if not OSX!
c2 = f"LC_ALL=C /usr/bin/xargs /usr/bin/sed -i '' -e 's{d}{old_string}{d}{new_string}{d}g'"
# Now chain those calls together in a subprocess wheee
chained = c1 + " | " + c2
proc = subprocess.Popen(
chained,
cwd=repo_path,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
shell=True
)
_ = proc.communicate()
if __name__ == "__main__":
root_dir = "/Users/sarinacanelake/openedx/"
old_string = "github.com/edx"
new_string = "github.com/openedx"
main("openedx", root_dir, old_string, new_string, exclude_private=False, interactive=False)