Skip to content

Commit

Permalink
Updating rsepedia to use pypi for getting deps (#16)
Browse files Browse the repository at this point in the history
* preparing to update python repos with new parser
Signed-off-by: vsoch <[email protected]>
  • Loading branch information
vsoch authored Apr 24, 2022
1 parent ba52bbb commit 8f28e3e
Show file tree
Hide file tree
Showing 783 changed files with 49,743 additions and 47,114 deletions.
1 change: 0 additions & 1 deletion .github/workflows/update-analysis.yaml
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
name: update-analysis

on:
pull_request: []
schedule:
# Weekly on Wednesday (software updated Sunday night)
- cron: 0 0 * * 3
Expand Down
7 changes: 3 additions & 4 deletions 1.download.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@
import tempfile
import shutil
import argparse
import re
import sys
import os

Expand Down Expand Up @@ -125,7 +124,6 @@ def main():
if os.path.exists(destfile) and reqfile:
continue


print(f"{reponame}: {i} of {len(repos)}")
dest = clone(repo.url, tempdir)
if not dest:
Expand All @@ -150,7 +148,7 @@ def main():

if os.path.exists(destfile):
continue
try:
try:
cli = parser.RequirementsParser(filename=found, min_credit=0.001)
result = cli.gen(name=repo.uid, min_credit=0.001)
except:
Expand Down Expand Up @@ -196,6 +194,7 @@ def main():
shutil.rmtree(tempdir)
write_json(meta, meta_json)
utils.write_json(list(missing_requirements), "missing-requirements.json")



if __name__ == "__main__":
main()
112 changes: 39 additions & 73 deletions 2.analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import tempfile
import shutil


def clone(url, dest):
dest = os.path.join(dest, os.path.basename(url))
cmd = Command("git clone --depth 1 %s %s" % (url, dest))
Expand Down Expand Up @@ -135,6 +136,8 @@ def main():
meta = read_json(meta_json)
language_json = os.path.join("docs", "language-counts.json")

# Collect list of data files
data_files = []
count = 0
for i, reponame in enumerate(repos):
repo = pedia.get(reponame[0])
Expand All @@ -143,55 +146,12 @@ def main():
destfile = os.path.join(destdir, "data.json")
if not os.path.exists(destfile):
continue

print(f"{reponame}: {i} of {len(repos)}")
data_files.append(destfile)
count += 1

# Find the metadata file in the repo
found = None
for filename in recursive_find(destdir, pattern="*"):
basename = os.path.basename(filename)
if basename in packages.filesystem_manager_names:
found = filename
break

# This should not happen
if not found:
tempdir = tempfile.mkdtemp()
dest = clone(repo.url, tempdir)
for filename in recursive_find(tempdir, pattern="*"):
basename = os.path.basename(filename)
if basename in packages.filesystem_manager_names:
found = filename
break

if not found:
print('WARNING %s not found!' % repo.uid)
continue

# copy found file into folder
reqfile = os.path.join(destdir, os.path.basename(found))
shutil.copyfile(found, reqfile)
shutil.rmtree(dest)

if not found:
print('WARNING %s not found!' % repo.uid)
continue

try:
global_cli.gen(repo.uid, filename=found, min_credit=0.001)
except:
continue

write_json(meta, meta_json)

# Write language counts
counts = {}
for repo, language in meta["language"].items():
if language not in counts:
counts[language] = 0
counts[language] += 1
write_json(counts, language_json)
# This is how to render the custom (loaded) data
roots = global_cli.load_datafiles(data_files)

# Write results, changing round to include most
global_cli.round_by = 100
Expand All @@ -201,25 +161,33 @@ def main():
"RSEPedia Top Dependencies",
"dependencies",
)
+ global_cli.render(start_end_blocks=False)
+ global_cli.render(start_end_blocks=False, data=roots)
)
write_json(meta, meta_json)

# Write language counts
counts = {}
for repo, language in meta["language"].items():
if language not in counts:
counts[language] = 0
counts[language] += 1
write_json(counts, language_json)

# Replace for jekyll site
write_file(
os.path.join("docs", "all-repos.md"), global_cli.render(start_end_blocks=False)
os.path.join("docs", "all-repos.md"),
global_cli.render(start_end_blocks=False, data=roots),
)
write_file(os.path.join("pages", "dependencies.md"), content)

# Get stats for different languages
# 'setup.py', 'package.json', 'npm', 'DESCRIPTION', 'cran', 'pypi', 'go.mod', 'go', 'requirements.txt'])
python_deps = len(
set(global_cli.data["pypi"])
.union(global_cli.data["requirements.txt"])
.union(global_cli.data["pypi"])
set(roots["pypi"]).union(roots["requirements.txt"]).union(roots["pypi"])
)
r_deps = len(set(global_cli.data["DESCRIPTION"]).union(global_cli.data["cran"]))
js_deps = len(set(global_cli.data["npm"]).union(global_cli.data["package.json"]))
go_deps = len(set(global_cli.data["go"]).union(global_cli.data["go.mod"]))
r_deps = len(set(roots["DESCRIPTION"]).union(roots["cran"]))
js_deps = len(set(roots["npm"]).union(roots["package.json"]))
go_deps = len(set(roots["go"]).union(roots["go.mod"]))

stats = {
"python_deps": python_deps,
Expand All @@ -233,49 +201,47 @@ def main():
write_json(counts, os.path.join("_data", "language_counts.json"))

# Prepare scoped tables to languages
custom_cli = copy.deepcopy(global_cli)

# Keep count of repos / deps files for each
repos_counts = {}

# Python
custom_cli.data = custom_cli.prepare_custom_table(
["setup.py", "requirements.txt", "pypi"]
data = global_cli.load_datafiles(
data_files, includes=["setup.py", "requirements.txt", "pypi"]
)
content = (
header
% (
"RSEPedia Top Python Dependencies",
"python",
)
+ custom_cli.render(start_end_blocks=False)
+ global_cli.render(start_end_blocks=False, data=data)
)
write_file(os.path.join("pages", "python.md"), content)
repos_counts["Python"] = count_repos(custom_cli.data)
repos_counts["Python"] = count_repos(data)

# R
custom_cli.data = custom_cli.prepare_custom_table(["cran", "DESCRIPTION"])
content = header % ("RSEPedia Top R Dependencies", "R") + custom_cli.render(
start_end_blocks=False
data = global_cli.load_datafiles(data_files, includes=["cran", "DESCRIPTION"])
content = header % ("RSEPedia Top R Dependencies", "R") + global_cli.render(
start_end_blocks=False, data=data
)
write_file(os.path.join("pages", "r.md"), content)
repos_counts["R"] = count_repos(custom_cli.data)
repos_counts["R"] = count_repos(data)

# Javascript
custom_cli.data = custom_cli.prepare_custom_table(["package.json", "npm"])
content = header % ("RSEPedia Top Js Dependencies", "js") + custom_cli.render(
start_end_blocks=False
data = global_cli.load_datafiles(data_files, includes=["package.json", "npm"])
content = header % ("RSEPedia Top Js Dependencies", "js") + global_cli.render(
start_end_blocks=False, data=data
)
write_file(os.path.join("pages", "js.md"), content)
repos_counts["Js"] = count_repos(custom_cli.data)
repos_counts["Js"] = count_repos(data)

# Go
custom_cli.data = custom_cli.prepare_custom_table(["go.mod", "go"])
content = header % ("RSEPedia Top Go Dependencies", "go") + custom_cli.render(
start_end_blocks=False
)
data = global_cli.load_datafiles(data_files, includes=["go.mod", "go"])
content = header % ("RSEPedia Top Go Dependencies", "go") + global_cli.render(
start_end_blocks=False, data=data
)
write_file(os.path.join("pages", "go.md"), content)
repos_counts["Go"] = count_repos(custom_cli.data)
repos_counts["Go"] = count_repos(data)

# Emoji are problematic for jekyll data
for repo, desc in meta["description"].items():
Expand Down
12 changes: 4 additions & 8 deletions 3.parse_one.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,7 @@

from rse.main import Encyclopedia
from rse.utils.command import Command
from rse.utils.file import (
recursive_find,
write_json,
read_json,
)
from rse.utils.file import recursive_find

import citelang.utils as utils
import citelang.main.parser as parser
Expand All @@ -22,7 +18,6 @@
import tempfile
import shutil
import argparse
import re
import sys
import os

Expand Down Expand Up @@ -112,7 +107,7 @@ def main():
# copy found file into folder
shutil.copyfile(found, reqfile)

try:
try:
cli = parser.RequirementsParser(filename=found, min_credit=0.001)
result = cli.gen(name=repo.uid, min_credit=0.001)
except:
Expand Down Expand Up @@ -143,6 +138,7 @@ def main():
if os.path.exists(destdir) and len(os.listdir(destdir)) == 0:
shutil.rmtree(destdir)
shutil.rmtree(tempdir)



if __name__ == "__main__":
main()
Loading

0 comments on commit 8f28e3e

Please sign in to comment.