Skip to content

Commit 82a4aa5

Browse files
committed
refactor and rename check_package_source_url
1 parent cfe22b9 commit 82a4aa5

File tree

2 files changed

+71
-20
lines changed

2 files changed

+71
-20
lines changed

lisa/util/__init__.py

Lines changed: 67 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@
4848
# source -
4949
# https://github.com/django/django/blob/stable/1.3.x/django/core/validators.py#L45
5050
__url_pattern = re.compile(
51-
r"^(?:http|s?ftp)s?://" # http:// or https://
51+
r"^(?:http|https|sftp|ftp)://" # http:// or https://
5252
r"(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)"
5353
r"+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|" # ...domain
5454
r"localhost|" # localhost...
@@ -601,31 +601,51 @@ def is_valid_url(url: str, raise_error: bool = True) -> bool:
601601
return is_url
602602

603603

604-
def is_valid_source_code_package(
604+
def _raise_or_log_failure(log: "Logger", raise_error: bool, failure_msg: str) -> bool:
605+
if raise_error:
606+
raise LisaException(failure_msg)
607+
else:
608+
log.debug(failure_msg)
609+
return False
610+
611+
612+
# big function to check the parts of a url
613+
# allow raising exceptions or log and return a bool
614+
# allows checks for:
615+
# expected domains
616+
# protocols (require https, sftp, etc)
617+
# filenames (pattern matching)
618+
def check_url(
619+
log: "Logger",
605620
source_url: str,
606-
expected_package_name_pattern: Pattern[str],
621+
expected_filename_pattern: Optional[Pattern[str]] = None,
607622
allowed_protocols: Optional[List[str]] = None,
608623
expected_domains: Optional[List[str]] = None,
624+
raise_error: bool = False,
609625
) -> bool:
610626
# avoid using a mutable default parameter
611627
if not allowed_protocols:
612628
allowed_protocols = [
613629
"https",
614-
"sftp",
615630
]
616631
# first, check if it's a url.
617-
if not is_valid_url(url=source_url, raise_error=False):
632+
failure_msg = f"{source_url} is not a valid URL, check your arguments."
633+
if not (
634+
is_valid_url(url=source_url, raise_error=False)
635+
or _raise_or_log_failure(log, raise_error, failure_msg)
636+
):
618637
return False
619638

620639
# NOTE: urllib might not work as you'd expect.
621640
# It doesn't throw on lots of things you wouldn't expect to be urls.
622641
# You must verify the parts on your own, some of them may be empty, some null.
623642
# check: https://docs.python.org/3/library/urllib.parse.html#url-parsing
624-
643+
failure_msg = f"urlparse failed to parse url {source_url}, check your arguments."
625644
try:
626645
parts = urlparse(source_url)
627646
except ValueError:
628-
return False
647+
if not _raise_or_log_failure(log, raise_error, failure_msg):
648+
return False
629649

630650
# ex: from https://www.com/path/to/file.tar
631651
# scheme : https
@@ -634,23 +654,53 @@ def is_valid_source_code_package(
634654

635655
# get the filename from the path portion of the url
636656
file_path = parts.path.split("/")[-1]
637-
full_match = expected_package_name_pattern.match(file_path)
638-
if not full_match:
639-
return False
657+
full_match = None
658+
# check we can match against the filename
659+
if expected_filename_pattern:
660+
full_match = expected_filename_pattern.match(file_path)
661+
failure_msg = (
662+
f"File at {source_url} did not match pattern "
663+
"{expected_package_name_pattern.pattern}."
664+
)
665+
if not full_match:
666+
if not _raise_or_log_failure(log, raise_error, failure_msg):
667+
return False
640668

641669
# check the expected domain is correct if present
642670
valid_netloc = not expected_domains or any(
643671
[domain.endswith(parts.netloc) for domain in expected_domains]
644672
)
673+
failure_msg = (
674+
f"net location of url {source_url} did not match "
675+
f"expected domains { ','.join(expected_domains) } "
676+
)
677+
if not (valid_netloc or _raise_or_log_failure(log, raise_error, failure_msg)):
678+
return False
645679

646-
# optional but default is check access is via sftp/https
647-
valid_scheme = any([parts.scheme == x for x in allowed_protocols])
648-
return (
649-
valid_scheme
650-
and parts.netloc != ""
651-
and valid_netloc
652-
and (full_match.group(0) == file_path)
680+
# Check the protocol (aka scheme) in the url
681+
# default is check access is via https
682+
failure_msg = (
683+
f"URL {source_url} uses an invalid protocol "
684+
"or net location! Check url argument."
653685
)
686+
valid_scheme = any([parts.scheme == x for x in allowed_protocols])
687+
valid_netloc_and_scheme = valid_scheme and parts.netloc != "" and valid_netloc
688+
if not (
689+
valid_netloc_and_scheme or _raise_or_log_failure(log, raise_error, failure_msg)
690+
):
691+
return False
692+
# finally verify the full match we found matches the actual filename
693+
# avoids an accidental partial match
694+
if expected_filename_pattern and full_match:
695+
path_matches = full_match.group(0) == file_path
696+
failure_msg = (
697+
f"File at url {source_url} failed to match"
698+
f" pattern {expected_filename_pattern.pattern}."
699+
)
700+
if not (path_matches or _raise_or_log_failure(log, raise_error, failure_msg)):
701+
return False
702+
703+
return True
654704

655705

656706
def filter_ansi_escape(content: str) -> str:

microsoft/testsuites/dpdk/rdma_core.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from lisa import Node
1010
from lisa.operating_system import Debian, Fedora, Suse
1111
from lisa.tools import Git, Make, Pkgconfig, Tar, Wget
12-
from lisa.util import LisaException, SkippedException, is_valid_source_code_package
12+
from lisa.util import LisaException, SkippedException, check_url
1313

1414

1515
class RdmaCoreManager:
@@ -77,9 +77,10 @@ def _check_source_install(self) -> None:
7777
)
7878

7979
# finally, validate what we have looks reasonable and cool
80-
is_valid_package = is_valid_source_code_package(
80+
is_valid_package = check_url(
81+
self.node.log,
8182
source_url=self._rdma_core_source,
82-
expected_package_name_pattern=self._source_pattern,
83+
expected_path_pattern=self._source_pattern,
8384
allowed_protocols=["https"],
8485
expected_domains=[
8586
"visualstudio.com",

0 commit comments

Comments
 (0)