Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add find_file functionality from plotting service to api #424

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,12 @@ jobs:
run: |
pytest test/core/test_repositories_integration.py --random-order --random-order-bucket=global --cov --cov-report=xml

- name: Run test utilities
env:
CEPH_DIR: test/test_ceph
run: |
pytest test/core/test_utility.py --random-order --random-order-bucket=global --cov --cov-report=xml

- name: Upload coverage
uses: codecov/codecov-action@e28ff129e5465c2c0dcc6f003fc735cb6ae0c673 # v4.5.0
with:
Expand Down Expand Up @@ -132,7 +138,7 @@ jobs:
DATABASE_URL: postgresql://postgres:password@localhost:5432/test_db
run: |
pytest test/e2e --random-order --random-order-bucket=global --cov --cov-report=xml --ignore=test/e2e/test_job_maker.py

- name: Run test job maker
env:
DEV_MODE: True
Expand Down
113 changes: 113 additions & 0 deletions fia_api/core/utility.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
"""Collection of utility functions"""

import functools
import logging
import os
import sys
from collections.abc import Callable
from contextlib import suppress
from http import HTTPStatus
from pathlib import Path
from typing import Any, TypeVar, cast
Expand All @@ -12,6 +16,17 @@

FuncT = TypeVar("FuncT", bound=Callable[[str], Any])

stdout_handler = logging.StreamHandler(stream=sys.stdout)
logging.basicConfig(
handlers=[stdout_handler],
format="[%(asctime)s]-%(name)s-%(levelname)s: %(message)s",
level=logging.INFO,
)
logger = logging.getLogger(__name__)
logger.info("Starting Plotting Service")
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Lets import the already existent logger

CEPH_DIR = os.environ.get("CEPH_DIR", "../../test/test_ceph")
logger.info("Setting ceph directory to %s", CEPH_DIR)


def forbid_path_characters(func: FuncT) -> FuncT:
"""Decorator that prevents path characters {/, ., \\} from a functions args by raising UnsafePathError"""
Expand Down Expand Up @@ -59,3 +74,101 @@
raise HTTPException(
status_code=HTTPStatus.FORBIDDEN, detail="Invalid path being accessed and file not found."
) from err


def safe_check_filepath_plotting(filepath: Path, base_path: str) -> None:
"""
Check to ensure the path does contain the base path and that it does not resolve to some other directory
:param filepath: the filepath to check
:param base_path: base path to check against
:return:
"""
filepath.resolve(strict=True)
Dismissed Show dismissed Hide dismissed
if not filepath.is_relative_to(base_path):
raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Invalid path being accessed.")

Check warning on line 88 in fia_api/core/utility.py

View check run for this annotation

Codecov / codecov/patch

fia_api/core/utility.py#L88

Added line #L88 was not covered by tests


def find_file_instrument(ceph_dir: str, instrument: str, experiment_number: int, filename: str) -> Path | None:
"""
Find a file likely made by automated reduction of an experiment number
:param ceph_dir: base path of the filename path
:param instrument: name of the instrument to find the file in
:param experiment_number: experiment number of the file
:param filename: name of the file to find
:return: path to the filename or None
"""
# Run normal check
basic_path = Path(ceph_dir) / f"{instrument.upper()}/RBNumber/RB{experiment_number}/autoreduced/{filename}"

# Do a check as we are handling user entered data here
with suppress(OSError):
safe_check_filepath_plotting(filepath=basic_path, base_path=ceph_dir)

if basic_path.exists():
Dismissed Show dismissed Hide dismissed
return basic_path

# Attempt to find file in autoreduced folder
autoreduced_folder = Path(ceph_dir) / f"{instrument.upper()}/RBNumber/RB{experiment_number}/autoreduced"
return _safe_find_file_in_dir(dir_path=autoreduced_folder, base_path=ceph_dir, filename=filename)


def find_file_experiment_number(ceph_dir: str, experiment_number: int, filename: str) -> Path | None:
"""
Find the file for the given user_number
:param ceph_dir: base path of the path
:param experiment_number: experiment_number of the user who made the file and dir path
:param filename: filename to find
:return: Full path to the filename or None
"""
dir_path = Path(ceph_dir) / f"GENERIC/autoreduce/ExperimentNumbers/{experiment_number}/"
return _safe_find_file_in_dir(dir_path=dir_path, base_path=ceph_dir, filename=filename)


def find_file_user_number(ceph_dir: str, user_number: int, filename: str) -> Path | None:
"""
Find the file for the given user_number
:param ceph_dir: base path of the path
:param user_number: user number of the user who made the file and dir path
:param filename: filename to find
:return: Full path to the filename or None
"""
dir_path = Path(ceph_dir) / f"GENERIC/autoreduce/UserNumbers/{user_number}/"
return _safe_find_file_in_dir(dir_path=dir_path, base_path=ceph_dir, filename=filename)


def _safe_find_file_in_dir(dir_path: Path, base_path: str, filename: str) -> Path | None:
"""
Check that the directory path is safe and then search for filename in that directory and sub directories
:param dir_path: Path to check is safe and search in side of
:param base_path: the base directory of the path often just the /ceph dir on runners
:param filename: filename to find
:return: Path to the file or None
"""
# Do a check as we are handling user entered data here
try:
safe_check_filepath_plotting(filepath=dir_path, base_path=base_path)
except OSError:
raise HTTPException(status_code=HTTPStatus.FORBIDDEN, detail="Invalid path being accessed.") from None

if dir_path.exists():
Dismissed Show dismissed Hide dismissed
found_paths = list(dir_path.rglob(filename))
Dismissed Show dismissed Hide dismissed
if len(found_paths) > 0 and found_paths[0].exists():
return found_paths[0]

return None


def request_path_check(path: Path, base_dir: str) -> Path:
"""
Check if the path is not None, and remove the base dir from the path.
:param path: Path to check
:param base_dir: Base dir to remove if present
:return: Path without the base_dir
"""
if path is None:
logger.error("Could not find the file requested.")
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST)

Check warning on line 170 in fia_api/core/utility.py

View check run for this annotation

Codecov / codecov/patch

fia_api/core/utility.py#L168-L170

Added lines #L168 - L170 were not covered by tests
# Remove CEPH_DIR
if path.is_relative_to(base_dir):
path = path.relative_to(base_dir)
return path

Check warning on line 174 in fia_api/core/utility.py

View check run for this annotation

Codecov / codecov/patch

fia_api/core/utility.py#L172-L174

Added lines #L172 - L174 were not covered by tests
54 changes: 53 additions & 1 deletion fia_api/router.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,14 @@
get_job_by_instrument,
job_maker,
)
from fia_api.core.utility import safe_check_filepath
from fia_api.core.utility import (
CEPH_DIR,
find_file_experiment_number,
find_file_instrument,
find_file_user_number,
request_path_check,
safe_check_filepath,
)
from fia_api.scripts.acquisition import (
get_script_by_sha,
get_script_for_job,
Expand Down Expand Up @@ -349,3 +356,48 @@
await write_file_from_remote(file, file_directory)

return f"Successfully uploaded {filename}"


@ROUTER.get("/find_file/instrument/{instrument}/experiment_number/{experiment_number}", tags=["find_files"])
async def find_file_get_instrument(instrument: str, experiment_number: int, filename: str) -> str:
"""
Return the relative path to the env var CEPH_DIR that leads to the requested file if one exists.
:param instrument: Instrument the file belongs to.
:param experiment_number: Experiment number the file belongs to.
:param filename: Filename to find.
:return: The relative path to the file in the CEPH_DIR env var.
"""
path = find_file_instrument(

Check warning on line 370 in fia_api/router.py

View check run for this annotation

Codecov / codecov/patch

fia_api/router.py#L370

Added line #L370 was not covered by tests
ceph_dir=CEPH_DIR, instrument=instrument, experiment_number=experiment_number, filename=filename
)
if path is None:
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST)
return str(request_path_check(path=path, base_dir=CEPH_DIR))

Check warning on line 375 in fia_api/router.py

View check run for this annotation

Codecov / codecov/patch

fia_api/router.py#L373-L375

Added lines #L373 - L375 were not covered by tests


@ROUTER.get("/find_file/generic/experiment_number/{experiment_number}", tags=["find_files"])
async def find_file_generic_experiment_number(experiment_number: int, filename: str) -> str:
"""
Return the relative path to the env var CEPH_DIR that leads to the requested file if one exists.
:param experiment_number: Experiment number the file belongs to.
:param filename: Filename to find
:return: The relative path to the file in the CEPH_DIR env var.
"""
path = find_file_experiment_number(ceph_dir=CEPH_DIR, experiment_number=experiment_number, filename=filename)
if path is None:
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST)
return str(request_path_check(path=path, base_dir=CEPH_DIR))

Check warning on line 389 in fia_api/router.py

View check run for this annotation

Codecov / codecov/patch

fia_api/router.py#L386-L389

Added lines #L386 - L389 were not covered by tests


@ROUTER.get("/find_file/generic/user_number/{user_number}", tags=["find_files"])
async def find_file_generic_user_number(user_number: int, filename: str) -> str:
"""
Return the relative path to the env var CEPH_DIR that leads to the requested file if one exists.
:param user_number: Experiment number the file belongs to.
:param filename: Filename to find
:return: The relative path to the file in the CEPH_DIR env var.
"""
path = find_file_user_number(ceph_dir=CEPH_DIR, user_number=user_number, filename=filename)
if path is None:
raise HTTPException(status_code=HTTPStatus.BAD_REQUEST)
return str(request_path_check(path, base_dir=CEPH_DIR))

Check warning on line 403 in fia_api/router.py

View check run for this annotation

Codecov / codecov/patch

fia_api/router.py#L400-L403

Added lines #L400 - L403 were not covered by tests
148 changes: 147 additions & 1 deletion test/core/test_utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,34 @@
Tests for utility functions
"""

import shutil
from collections.abc import Callable
from pathlib import Path
from tempfile import TemporaryDirectory
from typing import Any

import pytest
from fastapi import HTTPException

from fia_api.core.exceptions import UnsafePathError
from fia_api.core.utility import filter_script_for_tokens, forbid_path_characters, safe_check_filepath
from fia_api.core.utility import (
filter_script_for_tokens,
find_file_experiment_number,
find_file_instrument,
find_file_user_number,
forbid_path_characters,
safe_check_filepath,
safe_check_filepath_plotting,
)

CEPH_DIR = Path(TemporaryDirectory().name)


@pytest.fixture(autouse=True)
def _setup_and_clean_temp_dir():
CEPH_DIR.mkdir(parents=True, exist_ok=True)
yield
shutil.rmtree(CEPH_DIR)


def dummy_string_arg_function(arg: str) -> str:
Expand Down Expand Up @@ -123,3 +144,128 @@ def test_non_existing_folder_path(tmp_path):
safe_check_filepath(file_path, base_path)
assert exc_info.errisinstance(HTTPException)
assert "Invalid path being accessed and file not found" in exc_info.exconly()


@pytest.mark.parametrize(
("filepath_to_check", "result"),
[
(Path(CEPH_DIR) / "good" / "path" / "here" / "file.txt", True),
(Path(CEPH_DIR) / "bad" / "path" / "here" / "file.txt", False),
(Path(CEPH_DIR) / ".." / ".." / ".." / "file.txt", False),
],
)
def test_safe_check_filepath_plotting(filepath_to_check: Path, result: bool):
if result:
filepath_to_check.parent.mkdir(parents=True, exist_ok=True)
filepath_to_check.write_text("Hello World!")
safe_check_filepath_plotting(filepath_to_check, str(CEPH_DIR))
else:
with pytest.raises((HTTPException, FileNotFoundError)):
safe_check_filepath_plotting(filepath_to_check, str(CEPH_DIR))


def test_find_instrument_most_likely_file():
with TemporaryDirectory() as tmpdir:
instrument_name = "FUN_INST"
experiment_number = 1231234
filename = "MAR1912991240_asa_dasd_123.nxspe"
path = Path(tmpdir) / instrument_name / "RBNumber" / f"RB{experiment_number}" / "autoreduced" / filename
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("Hello World!")

found_file = find_file_instrument(tmpdir, instrument_name, experiment_number, filename)

assert found_file == path


@pytest.mark.parametrize(
("find_file_method", "method_inputs", "path_to_make"),
[
(
find_file_instrument,
{
"ceph_dir": CEPH_DIR,
"instrument": "FUN_INST",
"experiment_number": 1231234,
"filename": "MAR1912991240_asa_dasd_123.nxspe",
},
CEPH_DIR / "FUN_INST" / "RBNumber" / "RB1231234" / "autoreduced" / "MAR1912991240_asa_dasd_123.nxspe",
),
(
find_file_experiment_number,
{"ceph_dir": CEPH_DIR, "experiment_number": 1231234, "filename": "MAR1912991240_asa_dasd_123.nxspe"},
CEPH_DIR / "GENERIC" / "autoreduce" / "ExperimentNumbers" / "1231234" / "MAR1912991240_asa_dasd_123.nxspe",
),
(
find_file_user_number,
{"ceph_dir": CEPH_DIR, "user_number": 1231234, "filename": "MAR1912991240_asa_dasd_123.nxspe"},
CEPH_DIR / "GENERIC" / "autoreduce" / "UserNumbers" / "1231234" / "MAR1912991240_asa_dasd_123.nxspe",
),
],
)
def test_find_file_method_in_a_dir(find_file_method: Callable, method_inputs: dict[str, Any], path_to_make: Path):
with TemporaryDirectory() as tmpdir:
instrument_name = "FUN_INST"
experiment_number = 1231234
filename = "MAR1912991240_asa_dasd_123.nxspe"
path = (
Path(tmpdir)
/ instrument_name
/ "RBNumber"
/ f"RB{experiment_number}"
/ "autoreduced"
/ "run-123141"
/ filename
)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("Hello World!")

found_file = find_file_instrument(tmpdir, instrument_name, experiment_number, filename)

assert found_file == path


@pytest.mark.parametrize(
("find_file_method", "method_inputs", "path_to_make"),
[
(
find_file_instrument,
{
"ceph_dir": CEPH_DIR,
"instrument": "FUN_INST",
"experiment_number": 1231234,
"filename": "MAR1912991240_asa_dasd_123.nxspe",
},
CEPH_DIR / "FUN_INST" / "RBNumber" / "RB1231234" / "autoreduced" / "MAR1912991240_asa_dasd_123.nxspe",
),
(
find_file_experiment_number,
{"ceph_dir": CEPH_DIR, "experiment_number": 1231234, "filename": "MAR1912991240_asa_dasd_123.nxspe"},
CEPH_DIR / "GENERIC" / "autoreduce" / "ExperimentNumbers" / "1231234" / "MAR1912991240_asa_dasd_123.nxspe",
),
(
find_file_user_number,
{"ceph_dir": CEPH_DIR, "user_number": 1231234, "filename": "MAR1912991240_asa_dasd_123.nxspe"},
CEPH_DIR / "GENERIC" / "autoreduce" / "UserNumbers" / "1231234" / "MAR1912991240_asa_dasd_123.nxspe",
),
],
)
def test_find_file_method_when_failed(find_file_method: Callable, method_inputs: dict[str, Any], path_to_make: Path):
path_to_make.parent.mkdir(parents=True, exist_ok=True)

found_file = find_file_method(**method_inputs)

assert found_file is None


@pytest.mark.parametrize(
("find_file_method", "method_inputs"),
[
(find_file_instrument, {CEPH_DIR, "~/.ssh", "id_rsa", "MAR1912991240_asa_dasd_123.nxspe"}),
(find_file_experiment_number, {CEPH_DIR, "~/.ssh", "id_rsa"}),
(find_file_user_number, {CEPH_DIR, "~/.ssh", "id_rsa"}),
],
)
def test_find_file_methods_does_not_allow_path_injection(find_file_method: Callable, method_inputs: dict[str, Any]):
with pytest.raises(HTTPException):
find_file_method(*method_inputs)
Binary file not shown.
Binary file not shown.
Loading