Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[EHN] add end point to list all data directory existing #3

Merged
merged 15 commits into from
Jan 30, 2024
12 changes: 9 additions & 3 deletions api/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,12 +97,18 @@ def get_owner_repo_provider(repo_url,provider_full_name=False):
repo = repo_url.split("/")[-1]
owner = repo_url.split("/")[-2]
provider = repo_url.split("/")[-3]
if provider not in ["github.com","gitlab.com"]:
if provider not in ["github.com","gitlab.com","www.github.com","www.gitlab.com"]:
abort(400, "Unrecognized repository provider.")

if provider == "www.github.com":
provider = "github.com"
if provider == "www.gitlab.com":
provider = "gitlab.com"

if not provider_full_name:
if provider == "github.com":
if (provider == "github.com"):
provider = "gh"
elif provider == "gitlab.com":
elif (provider == "gitlab.com"):
provider = "gl"

return [owner,repo,provider]
Expand Down
3 changes: 2 additions & 1 deletion api/github_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def gh_filter(input_str):
"""
Returns repository name in owner/repository_name format
"""
github_url_pattern = r'^https?://github\.com/([^/]+)/([^/]+)'
github_url_pattern = r'^https?://(?:www\.)?github\.com/([^/]+)/([^/]+)'
match = re.match(github_url_pattern, input_str)
if match:
owner = match.group(1)
Expand Down Expand Up @@ -148,6 +148,7 @@ def gh_get_project_name(github_client,target_repo):
folder as required by neurolibre.
"""
repo = github_client.get_repo(gh_filter(target_repo))
print(target_repo)
# This is a requirement
contents = repo.get_contents("binder/data_requirement.json")
data = json.loads(contents.decoded_content)
Expand Down
153 changes: 106 additions & 47 deletions api/neurolibre_celery_tasks.py

Large diffs are not rendered by default.

11 changes: 10 additions & 1 deletion api/neurolibre_common_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,4 +104,13 @@ def api_unlock_build(user, repo_url):
response = make_response(f"No build lock found for {repo_url}",404)

response.mimetype = "text/plain"
return response
return response

@common_api.route('/public/data', methods=['GET'])
@doc(description='List the name of folders under /DATA.', tags=['Data'])
def api_preview_list():
"""
This endpoint is to list the contents of the /DATA folder.
"""
files = os.listdir('/DATA')
return make_response(jsonify(files),200)
68 changes: 54 additions & 14 deletions api/neurolibre_preview_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
import neurolibre_common_api
from flask import jsonify, make_response
from common import *
from schema import BuildSchema, BuildTestSchema
from schema import BuildSchema, BuildTestSchema, DownloadSchema
from flask_htpasswd import HtPasswdAuth
from dotenv import load_dotenv
from werkzeug.middleware.proxy_fix import ProxyFix
from flask_apispec import FlaskApiSpec, marshal_with, doc, use_kwargs
from apispec import APISpec
from apispec.ext.marshmallow import MarshmallowPlugin
from github_client import *
from neurolibre_celery_tasks import celery_app, sleep_task, preview_build_book_task, preview_build_book_test_task
from neurolibre_celery_tasks import celery_app, sleep_task, preview_build_book_task, preview_build_book_test_task, preview_download_data
from celery.events.state import State
from github import Github, UnknownObjectException

Expand Down Expand Up @@ -52,13 +52,13 @@

app.logger.info(f"Using {binderName}.{domainName} as BinderHub.")

serverContact = app.config["SERVER_CONTACT"]
serverContact = app.config["SERVER_CONTACT"]
serverName = app.config["SERVER_SLUG"]
serverDescription = app.config["SERVER_DESC"]
serverTOS = app.config["SERVER_TOS"]
serverAbout = app.config["SERVER_ABOUT"] + app.config["SERVER_LOGO"]

# API specifications displayed on the swagger UI
# API specifications displayed on the swagger UI
spec = APISpec(
title="Neurolibre preview & screening API",
version='v1',
Expand All @@ -84,6 +84,7 @@
docs.register(neurolibre_common_api.api_get_books,blueprint="common_api")
docs.register(neurolibre_common_api.api_heartbeat,blueprint="common_api")
docs.register(neurolibre_common_api.api_unlock_build,blueprint="common_api")
docs.register(neurolibre_common_api.api_preview_list,blueprint="common_api")

"""
Configuration END
Expand All @@ -97,6 +98,45 @@
API Endpoints START
"""

@app.route('/api/data/cache', methods=['POST'])
@htpasswd.required
@marshal_with(None,code=422,description="Cannot validate the payload, missing or invalid entries.")
@use_kwargs(DownloadSchema())
@doc(description='Endpoint for downloading data through repo2data.', tags=['Data'])
def api_download_data(user, id, repo_url, email, is_overwrite):
"""
This endpoint is to download data from GitHub (technical screening) requests.
"""
GH_BOT=os.getenv('GH_BOT')
github_client = Github(GH_BOT)
issue_id = id

task_title = "Download data for preview."
comment_id = gh_template_respond(github_client,"pending",task_title,reviewRepository,issue_id)

celery_payload = dict(repo_url=repo_url,
rate_limit=build_rate_limit,
binder_name=binderName,
domain_name = domainName,
comment_id=comment_id,
issue_id=issue_id,
review_repository=reviewRepository,
task_title=task_title,
overwrite=is_overwrite,
email=email)

task_result = preview_download_data.apply_async(args=[celery_payload])

if task_result.task_id is not None:
gh_template_respond(github_client,"received",task_title,reviewRepository,issue_id,task_result.task_id,comment_id, "")
response = make_response(jsonify("Celery task assigned successfully."),200)
else:
# If not successfully assigned, fail the status immediately and return 500
gh_template_respond(github_client,"failure",task_title,reviewRepository,issue_id,task_result.task_id,comment_id, "Internal server error: NeuroLibre background task manager could not receive the request.")
response = make_response(jsonify("Celery could not start the task."),500)
return response

docs.register(api_download_data)

@app.route('/api/book/build', methods=['POST'])
@htpasswd.required
Expand All @@ -115,16 +155,16 @@ def api_book_build(user, id, repo_url, commit_hash):
task_title = "Book Build (Preview)"
comment_id = gh_template_respond(github_client,"pending",task_title,reviewRepository,issue_id)

celery_payload = dict(repo_url=repo_url,
commit_hash=commit_hash,
celery_payload = dict(repo_url=repo_url,
commit_hash=commit_hash,
rate_limit=build_rate_limit,
binder_name=binderName,
binder_name=binderName,
domain_name = domainName,
comment_id=comment_id,
issue_id=issue_id,
review_repository=reviewRepository,
task_title=task_title)

task_result = preview_build_book_task.apply_async(args=[celery_payload])

if task_result.task_id is not None:
Expand Down Expand Up @@ -153,18 +193,18 @@ def api_book_build_test(user, repo_url, commit_hash, email):
[owner, repo, provider] = get_owner_repo_provider(repo_url)
mail_subject = f"NRP test build for {owner}/{repo}"
mail_body = f"We have received your request to build a NeuroLibre reproducible preprint from {repo_url} at {commit_hash}. \n Your request has been queued, we will inform you when the process starts."

send_email(email, mail_subject, mail_body)

celery_payload = dict(repo_url=repo_url,
commit_hash=commit_hash,
celery_payload = dict(repo_url=repo_url,
commit_hash=commit_hash,
rate_limit=build_rate_limit,
binder_name=binderName,
binder_name=binderName,
domain_name = domainName,
email = email,
review_repository=reviewRepository,
mail_subject=mail_subject)

task_result = preview_build_book_test_task.apply_async(args=[celery_payload])

if task_result.task_id is not None:
Expand All @@ -174,7 +214,7 @@ def api_book_build_test(user, repo_url, commit_hash, email):
# If not successfully assigned, fail the status immediately and return 500
mail_body = f"We could not start processing your NRP test request due to a technical issue on the server side. Please contact [email protected]."
response = make_response(jsonify("Celery could not start the task."),500)

send_email(email, mail_subject, mail_body)
return response

Expand Down
Loading
Loading