diff --git a/.github/workflows/format.yml b/.github/workflows/format.yml new file mode 100644 index 000000000..23b89c81d --- /dev/null +++ b/.github/workflows/format.yml @@ -0,0 +1,22 @@ +name: Format Code + +on: [pull_request] + +jobs: + black-check: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v3 + + - name: Set up Python + uses: actions/setup-python@v4 + with: + python-version: "3.x" + + - name: Install Black + run: pip install black + + - name: Run Black + run: black --check . diff --git a/devel/site-list.py b/devel/site-list.py index 047b2264f..bf59dfcc6 100644 --- a/devel/site-list.py +++ b/devel/site-list.py @@ -13,7 +13,7 @@ # Removes schema-specific keywords for proper processing social_networks: dict = dict(data) -social_networks.pop('$schema', None) +social_networks.pop("$schema", None) # Sort the social networks in alphanumeric order social_networks: list = sorted(social_networks.items()) @@ -23,7 +23,9 @@ # Write the list of supported sites to sites.md with open("output/sites.mdx", "w") as site_file: - site_file.write("---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n") + site_file.write( + "---\ntitle: 'List of supported sites'\nsidebarTitle: 'Supported sites'\nicon: 'globe'\ndescription: 'Sherlock currently supports **400+** sites'\n---\n\n" + ) for social_network, info in social_networks: url_main = info["urlMain"] is_nsfw = "**(NSFW)**" if info.get("isNSFW") else "" @@ -36,4 +38,3 @@ data_file.write("\n") print("Finished updating supported site listing!") - diff --git a/sherlock_project/__init__.py b/sherlock_project/__init__.py index 52307cd76..9027a1bfc 100644 --- a/sherlock_project/__init__.py +++ b/sherlock_project/__init__.py @@ -1,4 +1,4 @@ -""" Sherlock Module +"""Sherlock Module This module contains the main logic to search for usernames at social networks. @@ -8,8 +8,10 @@ # This variable is only used to check for ImportErrors induced by users running as script rather than as module or package import_error_test_var = None -__shortname__ = "Sherlock" -__longname__ = "Sherlock: Find Usernames Across Social Networks" -__version__ = "0.15.0" +__shortname__ = "Sherlock" +__longname__ = "Sherlock: Find Usernames Across Social Networks" +__version__ = "0.15.0" -forge_api_latest_release = "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" +forge_api_latest_release = ( + "https://api.github.com/repos/sherlock-project/sherlock/releases/latest" +) diff --git a/sherlock_project/__main__.py b/sherlock_project/__main__.py index a252de0fc..47b65a76d 100644 --- a/sherlock_project/__main__.py +++ b/sherlock_project/__main__.py @@ -15,8 +15,11 @@ python_version = sys.version.split()[0] if sys.version_info < (3, 9): - print(f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock.") + print( + f"Sherlock requires Python 3.9+\nYou are using Python {python_version}, which is not supported by Sherlock." + ) sys.exit(1) from sherlock_project import sherlock + sherlock.main() diff --git a/sherlock_project/notify.py b/sherlock_project/notify.py index f6c785d63..a00d3b925 100644 --- a/sherlock_project/notify.py +++ b/sherlock_project/notify.py @@ -3,6 +3,7 @@ This module defines the objects for notifying the caller about the results of queries. """ + from sherlock_project.result import QueryStatus from colorama import Fore, Style import webbrowser @@ -155,13 +156,21 @@ def start(self, message): title = "Checking username" - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + f"] {title}" + - Fore.WHITE + f" {message}" + - Fore.GREEN + " on:") + print( + Style.BRIGHT + + Fore.GREEN + + "[" + + Fore.YELLOW + + "*" + + Fore.GREEN + + f"] {title}" + + Fore.WHITE + + f" {message}" + + Fore.GREEN + + " on:" + ) # An empty line between first line and the result(more clear output) - print('\r') + print("\r") return @@ -201,52 +210,92 @@ def update(self, result): # Output to the terminal is desired. if result.status == QueryStatus.CLAIMED: self.countResults() - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.GREEN + "+" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + - f" {self.result.site_name}: " + - Style.RESET_ALL + - f"{self.result.site_url_user}") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.GREEN + + "+" + + Fore.WHITE + + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}: " + + Style.RESET_ALL + + f"{self.result.site_url_user}" + ) if self.browse: webbrowser.open(self.result.site_url_user, 2) elif result.status == QueryStatus.AVAILABLE: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - response_time_text + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + " Not Found!") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + response_time_text + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.YELLOW + + " Not Found!" + ) elif result.status == QueryStatus.UNKNOWN: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + f" {self.result.context}" + - Fore.YELLOW + " ") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.RED + + f" {self.result.context}" + + Fore.YELLOW + + " " + ) elif result.status == QueryStatus.ILLEGAL: if self.print_all: msg = "Illegal Username Format For This Site!" - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.YELLOW + f" {msg}") - + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.YELLOW + + f" {msg}" + ) + elif result.status == QueryStatus.WAF: if self.print_all: - print(Style.BRIGHT + Fore.WHITE + "[" + - Fore.RED + "-" + - Fore.WHITE + "]" + - Fore.GREEN + f" {self.result.site_name}:" + - Fore.RED + " Blocked by bot detection" + - Fore.YELLOW + " (proxy may help)") + print( + Style.BRIGHT + + Fore.WHITE + + "[" + + Fore.RED + + "-" + + Fore.WHITE + + "]" + + Fore.GREEN + + f" {self.result.site_name}:" + + Fore.RED + + " Blocked by bot detection" + + Fore.YELLOW + + " (proxy may help)" + ) else: # It should be impossible to ever get here... @@ -267,12 +316,20 @@ def finish(self, message="The processing has been finished."): """ NumberOfResults = self.countResults() - 1 - print(Style.BRIGHT + Fore.GREEN + "[" + - Fore.YELLOW + "*" + - Fore.GREEN + "] Search completed with" + - Fore.WHITE + f" {NumberOfResults} " + - Fore.GREEN + "results" + Style.RESET_ALL - ) + print( + Style.BRIGHT + + Fore.GREEN + + "[" + + Fore.YELLOW + + "*" + + Fore.GREEN + + "] Search completed with" + + Fore.WHITE + + f" {NumberOfResults} " + + Fore.GREEN + + "results" + + Style.RESET_ALL + ) def __str__(self): """Convert Object To String. diff --git a/sherlock_project/result.py b/sherlock_project/result.py index c4d68b1c8..4e5b6c92f 100644 --- a/sherlock_project/result.py +++ b/sherlock_project/result.py @@ -2,6 +2,7 @@ This module defines various objects for recording the results of queries. """ + from enum import Enum @@ -10,11 +11,12 @@ class QueryStatus(Enum): Describes status of query about a given username. """ - CLAIMED = "Claimed" # Username Detected - AVAILABLE = "Available" # Username Not Detected - UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username - ILLEGAL = "Illegal" # Username Not Allowable For This Site - WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare) + + CLAIMED = "Claimed" # Username Detected + AVAILABLE = "Available" # Username Not Detected + UNKNOWN = "Unknown" # Error Occurred While Trying To Detect Username + ILLEGAL = "Illegal" # Username Not Allowable For This Site + WAF = "WAF" # Request blocked by WAF (i.e. Cloudflare) def __str__(self): """Convert Object To String. @@ -27,13 +29,16 @@ def __str__(self): """ return self.value -class QueryResult(): + +class QueryResult: """Query Result Object. Describes result of query about a given username. """ - def __init__(self, username, site_name, site_url_user, status, - query_time=None, context=None): + + def __init__( + self, username, site_name, site_url_user, status, query_time=None, context=None + ): """Create Query Result Object. Contains information about a specific method of detecting usernames on @@ -62,12 +67,12 @@ def __init__(self, username, site_name, site_url_user, status, Nothing. """ - self.username = username - self.site_name = site_name + self.username = username + self.site_name = site_name self.site_url_user = site_url_user - self.status = status - self.query_time = query_time - self.context = context + self.status = status + self.query_time = query_time + self.context = context return diff --git a/sherlock_project/sherlock.py b/sherlock_project/sherlock.py index 4e80d31c3..e5b75b1c2 100644 --- a/sherlock_project/sherlock.py +++ b/sherlock_project/sherlock.py @@ -10,10 +10,12 @@ import sys try: - from sherlock_project.__init__ import import_error_test_var # noqa: F401 + from sherlock_project.__init__ import import_error_test_var # noqa: F401 except ImportError: print("Did you run Sherlock with `python3 sherlock/sherlock.py ...`?") - print("This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions.") + print( + "This is an outdated method. Please see https://sherlockproject.xyz/installation for up to date instructions." + ) sys.exit(1) import csv @@ -216,13 +218,21 @@ def sherlock( from torrequest import TorRequest # noqa: E402 except ImportError: print("Important!") - print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.") - print("> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`.") - print("> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n") + print( + "> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock." + ) + print( + "> If you've installed Sherlock via pip, you can include the optional dependency via `pip install 'sherlock-project[tor]'`." + ) + print( + "> Other packages should refer to their documentation, or install it separately with `pip install torrequest`.\n" + ) sys.exit(query_notify.finish()) print("Important!") - print("> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock.") + print( + "> --tor and --unique-tor are now DEPRECATED, and may be removed in a future release of Sherlock." + ) # Requests using Tor obfuscation try: @@ -270,7 +280,7 @@ def sherlock( headers.update(net_info["headers"]) # URL of user on site (if it exists) - url = interpolate_string(net_info["url"], username.replace(' ', '%20')) + url = interpolate_string(net_info["url"], username.replace(" ", "%20")) # Don't make request if username is invalid for the site regex_check = net_info.get("regexCheck") @@ -413,10 +423,10 @@ def sherlock( # be highly targetted. Comment at the end of each fingerprint to # indicate target and date fingerprinted. WAFHitMsgs = [ - r'.loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark', # 2024-05-13 Cloudflare - r'', # 2024-11-11 Cloudflare error page - r'AwsWafIntegration.forceRefreshToken', # 2024-11-11 Cloudfront (AWS) - r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:' # 2024-04-09 PerimeterX / Human Security + r".loading-spinner{visibility:hidden}body.no-js .challenge-running{display:none}body.dark{background-color:#222;color:#d9d9d9}body.dark a{color:#fff}body.dark a:hover{color:#ee730a;text-decoration:underline}body.dark .lds-ring div{border-color:#999 transparent transparent}body.dark .font-red{color:#b20f03}body.dark", # 2024-05-13 Cloudflare + r'', # 2024-11-11 Cloudflare error page + r"AwsWafIntegration.forceRefreshToken", # 2024-11-11 Cloudfront (AWS) + r'{return l.onPageView}}),Object.defineProperty(r,"perimeterxIdentifiers",{enumerable:', # 2024-04-09 PerimeterX / Human Security ] if error_text is not None: diff --git a/sherlock_project/sites.py b/sherlock_project/sites.py index 847d15769..f2be19092 100644 --- a/sherlock_project/sites.py +++ b/sherlock_project/sites.py @@ -3,13 +3,23 @@ This module supports storing information about websites. This is the raw data that will be used to search for usernames. """ + import json import requests import secrets + class SiteInformation: - def __init__(self, name, url_home, url_username_format, username_claimed, - information, is_nsfw, username_unclaimed=secrets.token_urlsafe(10)): + def __init__( + self, + name, + url_home, + url_username_format, + username_claimed, + information, + is_nsfw, + username_unclaimed=secrets.token_urlsafe(10), + ): """Create Site Information Object. Contains information about a specific website. @@ -54,7 +64,7 @@ def __init__(self, name, url_home, url_username_format, username_claimed, self.username_claimed = username_claimed self.username_unclaimed = secrets.token_urlsafe(32) self.information = information - self.is_nsfw = is_nsfw + self.is_nsfw = is_nsfw return @@ -67,7 +77,7 @@ def __str__(self): Return Value: Nicely formatted string to get information about this object. """ - + return f"{self.name} ({self.url_home})" @@ -114,7 +124,9 @@ def __init__(self, data_file_path=None): # Ensure that specified data file has correct extension. if not data_file_path.lower().endswith(".json"): - raise FileNotFoundError(f"Incorrect JSON file extension for data file '{data_file_path}'.") + raise FileNotFoundError( + f"Incorrect JSON file extension for data file '{data_file_path}'." + ) # if "http://" == data_file_path[:7].lower() or "https://" == data_file_path[:8].lower(): if data_file_path.lower().startswith("http"): @@ -127,9 +139,10 @@ def __init__(self, data_file_path=None): ) if response.status_code != 200: - raise FileNotFoundError(f"Bad response while accessing " - f"data file URL '{data_file_path}'." - ) + raise FileNotFoundError( + f"Bad response while accessing " + f"data file URL '{data_file_path}'." + ) try: site_data = response.json() except Exception as error: @@ -149,11 +162,12 @@ def __init__(self, data_file_path=None): ) except FileNotFoundError: - raise FileNotFoundError(f"Problem while attempting to access " - f"data file '{data_file_path}'." - ) - - site_data.pop('$schema', None) + raise FileNotFoundError( + f"Problem while attempting to access " + f"data file '{data_file_path}'." + ) + + site_data.pop("$schema", None) self.sites = {} @@ -161,21 +175,22 @@ def __init__(self, data_file_path=None): for site_name in site_data: try: - self.sites[site_name] = \ - SiteInformation(site_name, - site_data[site_name]["urlMain"], - site_data[site_name]["url"], - site_data[site_name]["username_claimed"], - site_data[site_name], - site_data[site_name].get("isNSFW",False) - - ) + self.sites[site_name] = SiteInformation( + site_name, + site_data[site_name]["urlMain"], + site_data[site_name]["url"], + site_data[site_name]["username_claimed"], + site_data[site_name], + site_data[site_name].get("isNSFW", False), + ) except KeyError as error: raise ValueError( f"Problem parsing json contents at '{data_file_path}': Missing attribute {error}." ) except TypeError: - print(f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n") + print( + f"Encountered TypeError parsing json contents for target '{site_name}' at {data_file_path}\nSkipping target.\n" + ) return @@ -194,8 +209,8 @@ def remove_nsfw_sites(self, do_not_remove: list = []): for site in self.sites: if self.sites[site].is_nsfw and site.casefold() not in do_not_remove: continue - sites[site] = self.sites[site] - self.sites = sites + sites[site] = self.sites[site] + self.sites = sites def site_name_list(self): """Get Site Name List. diff --git a/tests/conftest.py b/tests/conftest.py index 51c908146..776698f55 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -4,20 +4,33 @@ import pytest from sherlock_project.sites import SitesInformation + @pytest.fixture() def sites_obj(): - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) + sites_obj = SitesInformation( + data_file_path=os.path.join( + os.path.dirname(__file__), "../sherlock_project/resources/data.json" + ) + ) yield sites_obj + @pytest.fixture(scope="session") def sites_info(): - sites_obj = SitesInformation(data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock_project/resources/data.json")) + sites_obj = SitesInformation( + data_file_path=os.path.join( + os.path.dirname(__file__), "../sherlock_project/resources/data.json" + ) + ) sites_iterable = {site.name: site.information for site in sites_obj} yield sites_iterable + @pytest.fixture(scope="session") def remote_schema(): - schema_url: str = 'https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json' + schema_url: str = ( + "https://raw.githubusercontent.com/sherlock-project/sherlock/master/sherlock_project/resources/data.schema.json" + ) with urllib.request.urlopen(schema_url) as remoteschema: schemadat = json.load(remoteschema) yield schemadat diff --git a/tests/few_test_basic.py b/tests/few_test_basic.py index f70403226..9ee48f879 100644 --- a/tests/few_test_basic.py +++ b/tests/few_test_basic.py @@ -1,7 +1,8 @@ import sherlock_project -#from sherlock.sites import SitesInformation -#local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") +# from sherlock.sites import SitesInformation +# local_manifest = data_file_path=os.path.join(os.path.dirname(__file__), "../sherlock/resources/data.json") + def test_username_via_message(): sherlock_project.__main__("--version") diff --git a/tests/sherlock_interactives.py b/tests/sherlock_interactives.py index c28b9dc06..b484a68a6 100644 --- a/tests/sherlock_interactives.py +++ b/tests/sherlock_interactives.py @@ -3,36 +3,39 @@ import re import subprocess + class Interactives: - def run_cli(args:str = "") -> str: + def run_cli(args: str = "") -> str: """Pass arguments to Sherlock as a normal user on the command line""" # Adapt for platform differences (Windows likes to be special) if platform.system() == "Windows": - command:str = f"py -m sherlock_project {args}" + command: str = f"py -m sherlock_project {args}" else: - command:str = f"sherlock {args}" + command: str = f"sherlock {args}" - proc_out:str = "" + proc_out: str = "" try: - proc_out = subprocess.check_output(command, shell=True, stderr=subprocess.STDOUT) + proc_out = subprocess.check_output( + command, shell=True, stderr=subprocess.STDOUT + ) return proc_out.decode() except subprocess.CalledProcessError as e: raise InteractivesSubprocessError(e.output.decode()) - def walk_sherlock_for_files_with(pattern: str) -> list[str]: """Check all files within the Sherlock package for matching patterns""" - pattern:re.Pattern = re.compile(pattern) - matching_files:list[str] = [] + pattern: re.Pattern = re.compile(pattern) + matching_files: list[str] = [] for root, dirs, files in os.walk("sherlock_project"): for file in files: - file_path = os.path.join(root,file) + file_path = os.path.join(root, file) if "__pycache__" in file_path: continue - with open(file_path, 'r', errors='ignore') as f: + with open(file_path, "r", errors="ignore") as f: if pattern.search(f.read()): matching_files.append(file_path) return matching_files + class InteractivesSubprocessError(Exception): pass diff --git a/tests/test_manifest.py b/tests/test_manifest.py index 5c47fbb84..b40bfa897 100644 --- a/tests/test_manifest.py +++ b/tests/test_manifest.py @@ -3,17 +3,18 @@ import pytest from jsonschema import validate + def test_validate_manifest_against_local_schema(): """Ensures that the manifest matches the local schema, for situations where the schema is being changed.""" - json_relative: str = '../sherlock_project/resources/data.json' - schema_relative: str = '../sherlock_project/resources/data.schema.json' - + json_relative: str = "../sherlock_project/resources/data.json" + schema_relative: str = "../sherlock_project/resources/data.schema.json" + json_path: str = os.path.join(os.path.dirname(__file__), json_relative) schema_path: str = os.path.join(os.path.dirname(__file__), schema_relative) - with open(json_path, 'r') as f: + with open(json_path, "r") as f: jsondat = json.load(f) - with open(schema_path, 'r') as f: + with open(schema_path, "r") as f: schemadat = json.load(f) validate(instance=jsondat, schema=schemadat) @@ -22,18 +23,22 @@ def test_validate_manifest_against_local_schema(): @pytest.mark.online def test_validate_manifest_against_remote_schema(remote_schema): """Ensures that the manifest matches the remote schema, so as to not unexpectedly break clients.""" - json_relative: str = '../sherlock_project/resources/data.json' + json_relative: str = "../sherlock_project/resources/data.json" json_path: str = os.path.join(os.path.dirname(__file__), json_relative) - with open(json_path, 'r') as f: + with open(json_path, "r") as f: jsondat = json.load(f) validate(instance=jsondat, schema=remote_schema) + # Ensure that the expected values are beind returned by the site list -@pytest.mark.parametrize("target_name,target_expected_err_type", [ - ('GitHub', 'status_code'), - ('GitLab', 'message'), -]) -def test_site_list_iterability (sites_info, target_name, target_expected_err_type): - assert sites_info[target_name]['errorType'] == target_expected_err_type +@pytest.mark.parametrize( + "target_name,target_expected_err_type", + [ + ("GitHub", "status_code"), + ("GitLab", "message"), + ], +) +def test_site_list_iterability(sites_info, target_name, target_expected_err_type): + assert sites_info[target_name]["errorType"] == target_expected_err_type diff --git a/tests/test_probes.py b/tests/test_probes.py index 11fc8f837..a344cb862 100644 --- a/tests/test_probes.py +++ b/tests/test_probes.py @@ -5,7 +5,8 @@ from sherlock_project.sherlock import sherlock from sherlock_project.notify import QueryNotify from sherlock_project.result import QueryStatus -#from sherlock_interactives import Interactives + +# from sherlock_interactives import Interactives def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: @@ -16,90 +17,115 @@ def simple_query(sites_info: dict, site: str, username: str) -> QueryStatus: username=username, site_data=site_data, query_notify=query_notify, - )[site]['status'].status + )[ + site + ]["status"].status @pytest.mark.online class TestLiveTargets: """Actively test probes against live and trusted targets""" + # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('GitLab', 'ppfeister'), - ('AllMyLinks', 'blue'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("GitLab", "ppfeister"), + ("AllMyLinks", "blue"), + ], + ) def test_known_positives_via_message(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('GitHub', 'ppfeister'), - ('GitHub', 'sherlock-project'), - ('Docker Hub', 'ppfeister'), - ('Docker Hub', 'sherlock'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("GitHub", "ppfeister"), + ("GitHub", "sherlock-project"), + ("Docker Hub", "ppfeister"), + ("Docker Hub", "sherlock"), + ], + ) def test_known_positives_via_status_code(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Known positives should only use sites trusted to be reliable and unchanging - @pytest.mark.parametrize('site,username',[ - ('Keybase', 'blue'), - ('devRant', 'blue'), - ]) + @pytest.mark.parametrize( + "site,username", + [ + ("Keybase", "blue"), + ("devRant", "blue"), + ], + ) def test_known_positives_via_response_url(self, sites_info, site, username): - assert simple_query(sites_info=sites_info, site=site, username=username) is QueryStatus.CLAIMED - + assert ( + simple_query(sites_info=sites_info, site=site, username=username) + is QueryStatus.CLAIMED + ) # Randomly generate usernames of high length and test for positive availability # Randomly generated usernames should be simple alnum for simplicity and high # compatibility. Several attempts may be made ~just in case~ a real username is # generated. - @pytest.mark.parametrize('site,random_len',[ - ('GitLab', 255), - ('Codecademy', 30) - ]) + @pytest.mark.parametrize("site,random_len", [("GitLab", 255), ("Codecademy", 30)]) def test_likely_negatives_via_message(self, sites_info, site, random_len): num_attempts: int = 3 attempted_usernames: list[str] = [] status: QueryStatus = QueryStatus.CLAIMED for i in range(num_attempts): acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + random_handle = "".join( + random.choice(acceptable_types) for _ in range(random_len) + ) attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) + status = simple_query( + sites_info=sites_info, site=site, username=random_handle + ) if status is QueryStatus.AVAILABLE: break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." - + assert ( + status is QueryStatus.AVAILABLE + ), f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." # Randomly generate usernames of high length and test for positive availability # Randomly generated usernames should be simple alnum for simplicity and high # compatibility. Several attempts may be made ~just in case~ a real username is # generated. - @pytest.mark.parametrize('site,random_len',[ - ('GitHub', 39), - ('Docker Hub', 30) - ]) + @pytest.mark.parametrize("site,random_len", [("GitHub", 39), ("Docker Hub", 30)]) def test_likely_negatives_via_status_code(self, sites_info, site, random_len): num_attempts: int = 3 attempted_usernames: list[str] = [] status: QueryStatus = QueryStatus.CLAIMED for i in range(num_attempts): acceptable_types = string.ascii_letters + string.digits - random_handle = ''.join(random.choice(acceptable_types) for _ in range (random_len)) + random_handle = "".join( + random.choice(acceptable_types) for _ in range(random_len) + ) attempted_usernames.append(random_handle) - status = simple_query(sites_info=sites_info, site=site, username=random_handle) + status = simple_query( + sites_info=sites_info, site=site, username=random_handle + ) if status is QueryStatus.AVAILABLE: break - assert status is QueryStatus.AVAILABLE, f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." + assert ( + status is QueryStatus.AVAILABLE + ), f"Could not validate available username after {num_attempts} attempts with randomly generated usernames {attempted_usernames}." def test_username_illegal_regex(sites_info): - site: str = 'BitBucket' - invalid_handle: str = '*#$Y&*JRE' - pattern = re.compile(sites_info[site]['regexCheck']) + site: str = "BitBucket" + invalid_handle: str = "*#$Y&*JRE" + pattern = re.compile(sites_info[site]["regexCheck"]) # Ensure that the username actually fails regex before testing sherlock assert pattern.match(invalid_handle) is None - assert simple_query(sites_info=sites_info, site=site, username=invalid_handle) is QueryStatus.ILLEGAL - + assert ( + simple_query(sites_info=sites_info, site=site, username=invalid_handle) + is QueryStatus.ILLEGAL + ) diff --git a/tests/test_ux.py b/tests/test_ux.py index 3c62463b5..84d7ed414 100644 --- a/tests/test_ux.py +++ b/tests/test_ux.py @@ -3,41 +3,56 @@ from sherlock_interactives import Interactives from sherlock_interactives import InteractivesSubprocessError + def test_remove_nsfw(sites_obj): - nsfw_target: str = 'Pornhub' + nsfw_target: str = "Pornhub" assert nsfw_target in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites() assert nsfw_target not in {site.name: site.information for site in sites_obj} # Parametrized sites should *not* include Motherless, which is acting as the control -@pytest.mark.parametrize('nsfwsites', [ - ['Pornhub'], - ['Pornhub', 'Xvideos'], -]) +@pytest.mark.parametrize( + "nsfwsites", + [ + ["Pornhub"], + ["Pornhub", "Xvideos"], + ], +) def test_nsfw_explicit_selection(sites_obj, nsfwsites): for site in nsfwsites: assert site in {site.name: site.information for site in sites_obj} sites_obj.remove_nsfw_sites(do_not_remove=nsfwsites) for site in nsfwsites: assert site in {site.name: site.information for site in sites_obj} - assert 'Motherless' not in {site.name: site.information for site in sites_obj} + assert "Motherless" not in {site.name: site.information for site in sites_obj} + def test_wildcard_username_expansion(): - assert sherlock.check_for_parameter('test{?}test') is True - assert sherlock.check_for_parameter('test{.}test') is False - assert sherlock.check_for_parameter('test{}test') is False - assert sherlock.check_for_parameter('testtest') is False - assert sherlock.check_for_parameter('test{?test') is False - assert sherlock.check_for_parameter('test?}test') is False - assert sherlock.multiple_usernames('test{?}test') == ["test_test" , "test-test" , "test.test"] - - -@pytest.mark.parametrize('cliargs', [ - '', - '--site urghrtuight --egiotr', - '--', -]) + assert sherlock.check_for_parameter("test{?}test") is True + assert sherlock.check_for_parameter("test{.}test") is False + assert sherlock.check_for_parameter("test{}test") is False + assert sherlock.check_for_parameter("testtest") is False + assert sherlock.check_for_parameter("test{?test") is False + assert sherlock.check_for_parameter("test?}test") is False + assert sherlock.multiple_usernames("test{?}test") == [ + "test_test", + "test-test", + "test.test", + ] + + +@pytest.mark.parametrize( + "cliargs", + [ + "", + "--site urghrtuight --egiotr", + "--", + ], +) def test_no_usernames_provided(cliargs): - with pytest.raises(InteractivesSubprocessError, match=r"error: the following arguments are required: USERNAMES"): + with pytest.raises( + InteractivesSubprocessError, + match=r"error: the following arguments are required: USERNAMES", + ): Interactives.run_cli(cliargs) diff --git a/tests/test_version.py b/tests/test_version.py index 2de64dddd..ba3add659 100644 --- a/tests/test_version.py +++ b/tests/test_version.py @@ -2,13 +2,14 @@ from sherlock_interactives import Interactives import sherlock_project + def test_versioning() -> None: # Ensure __version__ matches version presented to the user assert sherlock_project.__version__ in Interactives.run_cli("--version") # Ensure __init__ is single source of truth for __version__ in package # Temporarily allows sherlock.py so as to not trigger early upgrades - found:list = Interactives.walk_sherlock_for_files_with(r'__version__ *= *') - expected:list = [ + found: list = Interactives.walk_sherlock_for_files_with(r"__version__ *= *") + expected: list = [ # Normalization is REQUIRED for Windows ( / vs \ ) os.path.normpath("sherlock_project/__init__.py"), ]