From 6afe9aca7909953b86a676cf1c7cc78052920dc9 Mon Sep 17 00:00:00 2001 From: JH Date: Mon, 24 Jun 2019 22:33:47 +0800 Subject: [PATCH 01/10] speedup --- that_is_me_on_github/lib/utils.py | 37 +++++++++++++++++++++---------- that_is_me_on_github/main.py | 23 ++++++++++++++----- 2 files changed, 43 insertions(+), 17 deletions(-) diff --git a/that_is_me_on_github/lib/utils.py b/that_is_me_on_github/lib/utils.py index 2f27264..4bf7f6d 100644 --- a/that_is_me_on_github/lib/utils.py +++ b/that_is_me_on_github/lib/utils.py @@ -1,3 +1,5 @@ +from concurrent.futures import ProcessPoolExecutor + from typing import List from typing import Dict from github.Commit import Commit @@ -20,10 +22,10 @@ def single_user(g: Github, username: str) -> NamedUser: # get repos owned by username def owned_repos(g: Github, username, is_public=True) -> List[Repository]: params = ["user:{}".format(username)] - + if is_public: params.append("is:public") - + return [ repo for repo in g.search_repositories(build_query(params), "stars", "desc") ] @@ -34,14 +36,14 @@ def commits(g: Github, username, is_public=True, orgs=[], repos=[]) -> List[Comm params = ["author:{}".format(username)] if is_public: params.append("is:public") - + if orgs or repos: for org in orgs: params.append("org:{}".format(org)) - + for repo in repos: params.append("repo:{}".format(repo)) - + return [ commit for commit in g.search_commits(build_query(params), "author-date", "desc") @@ -50,28 +52,39 @@ def commits(g: Github, username, is_public=True, orgs=[], repos=[]) -> List[Comm # get issues or prs authored by username and filtered by certain repos and organizations def issues_and_prs( - g: Github, username: str, is_public=True, type="", orgs=[], repos=[] + g: Github, username: str, is_public=True, type="", orgs=[], repos=[] ) -> Dict[str, List[Issue]]: params = ["author:{}".format(username)] - + if type: params.append("type:{}".format(type)) - + if is_public: params.append("is:public") - + if orgs or repos: for org in orgs: params.append("org:{}".format(org)) - + for repo in repos: params.append("repo:{}".format(repo)) - + issues_and_prs = {} for issue_or_pr in g.search_issues(build_query(params), "updated", "desc"): if issue_or_pr.repository.name not in issues_and_prs: issues_and_prs[issue_or_pr.repository.name] = [issue_or_pr] else: issues_and_prs[issue_or_pr.repository.name].append(issue_or_pr) - + return issues_and_prs + + +def handle_tasks(tasks): + with ProcessPoolExecutor() as executor: + futures = [] + for task in tasks: + fn, args, kwargs = task["func"], task["args"], task["kwargs"] + future = executor.submit(fn, args=args, kwargs=kwargs) + futures.append(future) + results = [future.result() for future in futures] + return results diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 7341d16..0a144e1 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -4,6 +4,7 @@ from lib.render import Render from lib.utils import * import os +from time import time @click.group() @@ -65,6 +66,7 @@ def generate( repo_filter: str, output: str, ): + start = time() path = os.path.expanduser(output) try: f = open(path, "w+") @@ -100,15 +102,26 @@ def generate( [item.strip() for item in repo_filter.split(",")] if repo_filter else [] ) + container = [ + {"func": owned_repos, "args": [g, username]}, + {"func": issues_and_prs, "args": [g, username], "kwargs": dict(type="pr", + orgs=org_filter, + repos=repo_filter)}, + {"func": issues_and_prs, "args": [g, username], "kwargs": dict(type="issue", + orgs=org_filter, + repos=repo_filter)} + ] + results = handle_tasks(container) + Render().render( user, - owned_repos(g, username), - issues_and_prs(g, username, type="pr", orgs=org_filter, repos=repo_filter), - issues_and_prs( - g, username, type="issue", orgs=org_filter, repos=repo_filter - ), + results[0], + results[1], + results[2], path, ) + end = time() + click.echo(f"waste time is {end-start} seconds") except RateLimitExceededException: click.echo( "Github rate limit reached, Please provide username, password or api_token (not support yet), and try again" From be691bb9ac0c080d8ccb3845d5a202697fdff9af Mon Sep 17 00:00:00 2001 From: JH Date: Mon, 24 Jun 2019 23:03:58 +0800 Subject: [PATCH 02/10] update --- that_is_me_on_github/lib/utils.py | 7 ++++--- that_is_me_on_github/main.py | 2 +- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/that_is_me_on_github/lib/utils.py b/that_is_me_on_github/lib/utils.py index 4bf7f6d..3f9d2e3 100644 --- a/that_is_me_on_github/lib/utils.py +++ b/that_is_me_on_github/lib/utils.py @@ -80,11 +80,12 @@ def issues_and_prs( def handle_tasks(tasks): - with ProcessPoolExecutor() as executor: + with ProcessPoolExecutor(max_workers=5) as executor: futures = [] for task in tasks: - fn, args, kwargs = task["func"], task["args"], task["kwargs"] - future = executor.submit(fn, args=args, kwargs=kwargs) + fn, args = task["func"], task["args"] + kwargs = task.get("kwargs", {}) # type: dict + future = executor.submit(fn, *args, **kwargs) futures.append(future) results = [future.result() for future in futures] return results diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 0a144e1..836db24 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -121,7 +121,7 @@ def generate( path, ) end = time() - click.echo(f"waste time is {end-start} seconds") + click.echo(f"cost time {end-start} seconds") except RateLimitExceededException: click.echo( "Github rate limit reached, Please provide username, password or api_token (not support yet), and try again" From 5321be0735dd518485024de55b64c862ea022b4b Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 09:47:29 +0800 Subject: [PATCH 03/10] add test cost time --- that_is_me_on_github/main.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 836db24..fb929a6 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -87,8 +87,10 @@ def generate( g = Github(auth_username, auth_password) else: g = Github() - + t1 = time() user = single_user(g, username) + t2 = time() + click.echo(f"request user cost time: {t2-t1}") if not user: click.echo("User {} Not Found.".format(username)) raise click.Abort() @@ -111,8 +113,12 @@ def generate( orgs=org_filter, repos=repo_filter)} ] + t3 = time() results = handle_tasks(container) - + t4 = time() + click.echo(f"request github cost time: {t4-t3}") + + t5 = time() Render().render( user, results[0], @@ -120,6 +126,8 @@ def generate( results[2], path, ) + t6 = time() + click.echo(f"render cost time: {t6 - t5}") end = time() click.echo(f"cost time {end-start} seconds") except RateLimitExceededException: From a2a226aefeb2d7d0eb8be00dffa732e347431f57 Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 10:03:38 +0800 Subject: [PATCH 04/10] update --- that_is_me_on_github/main.py | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index fb929a6..c7921ba 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -87,13 +87,6 @@ def generate( g = Github(auth_username, auth_password) else: g = Github() - t1 = time() - user = single_user(g, username) - t2 = time() - click.echo(f"request user cost time: {t2-t1}") - if not user: - click.echo("User {} Not Found.".format(username)) - raise click.Abort() click.echo("Please wait for a few seconds.") @@ -111,16 +104,21 @@ def generate( repos=repo_filter)}, {"func": issues_and_prs, "args": [g, username], "kwargs": dict(type="issue", orgs=org_filter, - repos=repo_filter)} + repos=repo_filter)}, + {"func": single_user, "args": [g, username]}, ] t3 = time() results = handle_tasks(container) + if not results[3]: + click.echo("User {} Not Found.".format(username)) + raise click.Abort() + t4 = time() click.echo(f"request github cost time: {t4-t3}") t5 = time() Render().render( - user, + results[3], results[0], results[1], results[2], From 844bae5fcb39d78df2108e217e1d80a87f69f1da Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 10:12:58 +0800 Subject: [PATCH 05/10] use thread --- that_is_me_on_github/lib/utils.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/that_is_me_on_github/lib/utils.py b/that_is_me_on_github/lib/utils.py index 3f9d2e3..e0fa35b 100644 --- a/that_is_me_on_github/lib/utils.py +++ b/that_is_me_on_github/lib/utils.py @@ -1,4 +1,4 @@ -from concurrent.futures import ProcessPoolExecutor +from concurrent.futures import ThreadPoolExecutor from typing import List from typing import Dict @@ -80,7 +80,7 @@ def issues_and_prs( def handle_tasks(tasks): - with ProcessPoolExecutor(max_workers=5) as executor: + with ThreadPoolExecutor(max_workers=5) as executor: futures = [] for task in tasks: fn, args = task["func"], task["args"] From 15685dd9d1e4da0fa91649793ea9159767f6c128 Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 11:00:24 +0800 Subject: [PATCH 06/10] update --- that_is_me_on_github/main.py | 42 ++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 21 deletions(-) diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index c7921ba..2cc9d0c 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -58,13 +58,13 @@ def version(): help="The output markdown file path. default value is `./that_is_me_on_github.md`", ) def generate( - username: str, - do_auth: bool, - auth_username: str, - auth_password: str, - org_filter: str, - repo_filter: str, - output: str, + username: str, + do_auth: bool, + auth_username: str, + auth_password: str, + org_filter: str, + repo_filter: str, + output: str, ): start = time() path = os.path.expanduser(output) @@ -74,7 +74,7 @@ def generate( except IOError: click.echo("Error: output path not exist and not creatable.") raise click.Abort() - + try: if do_auth: if not auth_username: @@ -83,28 +83,28 @@ def generate( auth_password = click.prompt( "Your github password", type=str, hide_input=True ) - + g = Github(auth_username, auth_password) else: g = Github() - + click.echo("Please wait for a few seconds.") - + org_filter = ( [item.strip() for item in org_filter.split(",")] if org_filter else [] ) repo_filter = ( [item.strip() for item in repo_filter.split(",")] if repo_filter else [] ) - + container = [ {"func": owned_repos, "args": [g, username]}, - {"func": issues_and_prs, "args": [g, username], "kwargs": dict(type="pr", - orgs=org_filter, - repos=repo_filter)}, - {"func": issues_and_prs, "args": [g, username], "kwargs": dict(type="issue", - orgs=org_filter, - repos=repo_filter)}, + {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "pr", + 'orgs': [], + 'repos': []}}, + {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "issue", + 'orgs': [], + 'repos': []}}, {"func": single_user, "args": [g, username]}, ] t3 = time() @@ -112,9 +112,9 @@ def generate( if not results[3]: click.echo("User {} Not Found.".format(username)) raise click.Abort() - + t4 = time() - click.echo(f"request github cost time: {t4-t3}") + click.echo(f"request github cost time: {t4 - t3}") t5 = time() Render().render( @@ -127,7 +127,7 @@ def generate( t6 = time() click.echo(f"render cost time: {t6 - t5}") end = time() - click.echo(f"cost time {end-start} seconds") + click.echo(f"cost time {end - start} seconds") except RateLimitExceededException: click.echo( "Github rate limit reached, Please provide username, password or api_token (not support yet), and try again" From 0fc03fd39f285dbba664ff79f003334be9cd364f Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 13:17:06 +0800 Subject: [PATCH 07/10] update --- that_is_me_on_github/main.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 2cc9d0c..9d01a4b 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -102,14 +102,15 @@ def generate( {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "pr", 'orgs': [], 'repos': []}}, + {"func": single_user, "args": [g, username]}, {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "issue", 'orgs': [], 'repos': []}}, - {"func": single_user, "args": [g, username]}, + ] t3 = time() results = handle_tasks(container) - if not results[3]: + if not results[2]: click.echo("User {} Not Found.".format(username)) raise click.Abort() @@ -118,10 +119,10 @@ def generate( t5 = time() Render().render( - results[3], + results[2], results[0], results[1], - results[2], + results[3], path, ) t6 = time() From e136e25098226dfcdcc9fd6fd271979fa51906df Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 13:30:58 +0800 Subject: [PATCH 08/10] fix --- that_is_me_on_github/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 9d01a4b..45cdc72 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -100,12 +100,12 @@ def generate( container = [ {"func": owned_repos, "args": [g, username]}, {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "pr", - 'orgs': [], - 'repos': []}}, + 'orgs': org_filter, + 'repos': repo_filter}}, {"func": single_user, "args": [g, username]}, {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "issue", - 'orgs': [], - 'repos': []}}, + 'orgs': org_filter, + 'repos': repo_filter}}, ] t3 = time() From 82a38e7c26bfd546282fe9aaccb78b7e349f9b28 Mon Sep 17 00:00:00 2001 From: JH Date: Tue, 25 Jun 2019 15:11:41 +0800 Subject: [PATCH 09/10] Adjust the order --- that_is_me_on_github/main.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 45cdc72..7c06de3 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -99,14 +99,14 @@ def generate( container = [ {"func": owned_repos, "args": [g, username]}, - {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "pr", + {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "issue", 'orgs': org_filter, 'repos': repo_filter}}, {"func": single_user, "args": [g, username]}, - {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "issue", + {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "pr", 'orgs': org_filter, 'repos': repo_filter}}, - + ] t3 = time() results = handle_tasks(container) @@ -121,8 +121,8 @@ def generate( Render().render( results[2], results[0], - results[1], results[3], + results[1], path, ) t6 = time() From 26a2d2a9749fe09cc58b2f824d5e67498a5cf255 Mon Sep 17 00:00:00 2001 From: JH Date: Thu, 27 Jun 2019 12:45:05 +0800 Subject: [PATCH 10/10] del cost time code --- that_is_me_on_github/lib/utils.py | 4 ++-- that_is_me_on_github/main.py | 13 ++----------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/that_is_me_on_github/lib/utils.py b/that_is_me_on_github/lib/utils.py index e0fa35b..11d39f8 100644 --- a/that_is_me_on_github/lib/utils.py +++ b/that_is_me_on_github/lib/utils.py @@ -1,5 +1,3 @@ -from concurrent.futures import ThreadPoolExecutor - from typing import List from typing import Dict from github.Commit import Commit @@ -8,6 +6,8 @@ from github.Repository import Repository from github import Github +from concurrent.futures import ThreadPoolExecutor + # build query by params def build_query(params: str) -> str: diff --git a/that_is_me_on_github/main.py b/that_is_me_on_github/main.py index 7c06de3..e68fbd9 100644 --- a/that_is_me_on_github/main.py +++ b/that_is_me_on_github/main.py @@ -66,7 +66,6 @@ def generate( repo_filter: str, output: str, ): - start = time() path = os.path.expanduser(output) try: f = open(path, "w+") @@ -106,18 +105,13 @@ def generate( {"func": issues_and_prs, "args": [g, username], "kwargs": {'type': "pr", 'orgs': org_filter, 'repos': repo_filter}}, - ] - t3 = time() + results = handle_tasks(container) if not results[2]: click.echo("User {} Not Found.".format(username)) raise click.Abort() - t4 = time() - click.echo(f"request github cost time: {t4 - t3}") - - t5 = time() Render().render( results[2], results[0], @@ -125,10 +119,7 @@ def generate( results[1], path, ) - t6 = time() - click.echo(f"render cost time: {t6 - t5}") - end = time() - click.echo(f"cost time {end - start} seconds") + except RateLimitExceededException: click.echo( "Github rate limit reached, Please provide username, password or api_token (not support yet), and try again"