From 2f93963a0a7f1fb667170f39027012bf18bf54b1 Mon Sep 17 00:00:00 2001
From: Soxoj <31013580+soxoj@users.noreply.github.com>
Date: Sun, 1 Dec 2024 11:41:41 +0100
Subject: [PATCH] Refactored sites module, updated documentation (#1918)

---
 Makefile                    |   4 +-
 docs/source/development.rst |   5 +-
 maigret/checking.py         |   3 +-
 maigret/sites.py            | 126 ++++++++++++++++++++----------------
 poetry.lock                 |  79 +++++++++++++++++++++-
 pyproject.toml              |  10 +++
 sites.md                    |  48 +++++++-------
 utils/update_site_data.py   |   2 +-
 8 files changed, 191 insertions(+), 86 deletions(-)

diff --git a/Makefile b/Makefile
index d091c83a..534ddf9b 100644
--- a/Makefile
+++ b/Makefile
@@ -16,10 +16,10 @@ lint:
 	flake8 --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --ignore=E731,W503,E501 ${LINT_FILES}
 
 	@echo 'mypy'
-	mypy ${LINT_FILES}
+	mypy --check-untyped-defs ${LINT_FILES}
 
 speed:
-	time python3 ./maigret.py --version
+	time python3 -m maigret --version
 	python3 -c "import timeit; t = timeit.Timer('import maigret'); print(t.timeit(number = 1000000))"
 	python3 -X importtime -c "import maigret" 2> maigret-import.log
 	python3 -m tuna maigret-import.log
diff --git a/docs/source/development.rst b/docs/source/development.rst
index cffcfe18..2ee6a40e 100644
--- a/docs/source/development.rst
+++ b/docs/source/development.rst
@@ -33,7 +33,7 @@ Install test requirements:
 
 .. code-block:: console
 
-  pip install -r test-requirements.txt
+  poetry install --with dev
 
 
 Use the following commands to check Maigret:
@@ -54,6 +54,9 @@ Use the following commands to check Maigret:
   # open html report
   open htmlcov/index.html
 
+  # get flamechart of imports to estimate startup time
+  make speed
+
 
 How to fix false-positives
 -----------------------------------------------
diff --git a/maigret/checking.py b/maigret/checking.py
index 1583c027..ea9aa0e0 100644
--- a/maigret/checking.py
+++ b/maigret/checking.py
@@ -11,7 +11,6 @@
 
 # Third party imports
 import aiodns
-import alive_progress
 from alive_progress import alive_bar
 from aiohttp import ClientSession, TCPConnector, http_exceptions
 from aiohttp.client_exceptions import ClientConnectorError, ServerDisconnectedError
@@ -127,7 +126,7 @@ async def check(self) -> Tuple[str, int, Optional[CheckError]]:
         async with ClientSession(
             connector=connector,
             trust_env=True,
-            cookie_jar=self.cookie_jar.copy() if self.cookie_jar else None
+            cookie_jar=self.cookie_jar.copy() if self.cookie_jar else None,
         ) as session:
             html_text, status_code, error = await self._make_request(
                 session,
diff --git a/maigret/sites.py b/maigret/sites.py
index dc4cb50e..9e8dfabe 100644
--- a/maigret/sites.py
+++ b/maigret/sites.py
@@ -21,6 +21,7 @@ def json(self):
 
 
 class MaigretSite:
+    # Fields that should not be serialized when converting site to JSON
     NOT_SERIALIZABLE_FIELDS = [
         "name",
         "engineData",
@@ -31,37 +32,65 @@ class MaigretSite:
         "urlRegexp",
     ]
 
+    # Username known to exist on the site
     username_claimed = ""
+    # Username known to not exist on the site
     username_unclaimed = ""
+    # Additional URL path component, e.g. /forum in https://example.com/forum/users/{username}
     url_subpath = ""
+    # Main site URL (the main page)
     url_main = ""
+    # Full URL pattern for username page, e.g. https://example.com/forum/users/{username}
     url = ""
+    # Whether site is disabled. Not used by Maigret without --use-disabled argument
     disabled = False
+    # Whether a positive result indicates accounts with similar usernames rather than exact matches
     similar_search = False
+    # Whether to ignore 403 status codes
     ignore403 = False
+    # Site category tags
     tags: List[str] = []
 
+    # Type of identifier (username, gaia_id etc); see SUPPORTED_IDS in checking.py
     type = "username"
+    # Custom HTTP headers
     headers: Dict[str, str] = {}
+    # Error message substrings
     errors: Dict[str, str] = {}
+    # Site activation requirements
     activation: Dict[str, Any] = {}
+    # Regular expression for username validation
     regex_check = None
+    # URL to probe site status
     url_probe = None
+    # Type of check to perform
     check_type = ""
+    # Whether to only send HEAD requests (GET by default)
     request_head_only = ""
+    # GET parameters to include in requests
     get_params: Dict[str, Any] = {}
 
+    # Substrings in HTML response that indicate profile exists
     presense_strs: List[str] = []
+    # Substrings in HTML response that indicate profile doesn't exist
     absence_strs: List[str] = []
+    # Site statistics
     stats: Dict[str, Any] = {}
 
+    # Site engine name
     engine = None
+    # Engine-specific configuration
     engine_data: Dict[str, Any] = {}
+    # Engine instance
     engine_obj: Optional["MaigretEngine"] = None
+    # Future for async requests
     request_future = None
+    # Alexa traffic rank
     alexa_rank = None
+    # Source (in case a site is a mirror of another site)
     source = None
 
+    # URL protocol (http/https)
     protocol = ''
 
     def __init__(self, name, information):
@@ -96,20 +125,21 @@ def __is_equal_by_url_or_name(self, url_or_name_str: str):
     def __eq__(self, other):
         if isinstance(other, MaigretSite):
             # Compare only relevant attributes, not internal state like request_future
-            attrs_to_compare = ['name', 'url_main', 'url_subpath', 'type', 'headers',
-                              'errors', 'activation', 'regex_check', 'url_probe',
-                              'check_type', 'request_head_only', 'get_params',
-                              'presense_strs', 'absence_strs', 'stats', 'engine',
-                              'engine_data', 'alexa_rank', 'source', 'protocol']
+            attrs_to_compare = [
+                'name', 'url_main', 'url_subpath', 'type', 'headers',
+                'errors', 'activation', 'regex_check', 'url_probe',
+                'check_type', 'request_head_only', 'get_params',
+                'presense_strs', 'absence_strs', 'stats', 'engine',
+                'engine_data', 'alexa_rank', 'source', 'protocol'
+            ]
 
             return all(getattr(self, attr) == getattr(other, attr)
-                      for attr in attrs_to_compare)
+                         for attr in attrs_to_compare)
         elif isinstance(other, str):
             # Compare only by name (exactly) or url_main (partial similarity)
             return self.__is_equal_by_url_or_name(other)
         return False
 
-
     def update_detectors(self):
         if "url" in self.__dict__:
             url = self.url
@@ -474,78 +504,64 @@ def extract_ids_from_url(self, url: str) -> dict:
         return results
 
     def get_db_stats(self, is_markdown=False):
+        # Initialize counters
         sites_dict = self.sites_dict
-
         urls = {}
         tags = {}
-        output = ""
         disabled_count = 0
-        total_count = len(sites_dict)
-
-        message_checks = 0
         message_checks_one_factor = 0
-
         status_checks = 0
 
-        for _, site in sites_dict.items():
+        # Collect statistics
+        for site in sites_dict.values():
+            # Count disabled sites
             if site.disabled:
                 disabled_count += 1
 
+            # Count URL types
             url_type = site.get_url_template()
             urls[url_type] = urls.get(url_type, 0) + 1
 
-            if site.check_type == 'message' and not site.disabled:
-                message_checks += 1
-                if site.absence_strs and site.presense_strs:
-                    continue
-                message_checks_one_factor += 1
-
-            if site.check_type == 'status_code':
-                status_checks += 1
+            # Count check types for enabled sites
+            if not site.disabled:
+                if site.check_type == 'message':
+                    if not (site.absence_strs and site.presense_strs):
+                        message_checks_one_factor += 1
+                elif site.check_type == 'status_code':
+                    status_checks += 1
 
+            # Count tags
             if not site.tags:
                 tags["NO_TAGS"] = tags.get("NO_TAGS", 0) + 1
-
             for tag in filter(lambda x: not is_country_tag(x), site.tags):
                 tags[tag] = tags.get(tag, 0) + 1
 
+        # Calculate percentages
+        total_count = len(sites_dict)
         enabled_count = total_count - disabled_count
         enabled_perc = round(100 * enabled_count / total_count, 2)
-        output += (
-            f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%\n\n"
-        )
-
         checks_perc = round(100 * message_checks_one_factor / enabled_count, 2)
-        output += f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)\n\n"
-
         status_checks_perc = round(100 * status_checks / enabled_count, 2)
-        output += f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)\n\n"
 
-        output += (
-            f"False positive risk (total): {checks_perc+status_checks_perc:.2f}%\n\n"
-        )
-
-        top_urls_count = 20
-        output += f"Top {top_urls_count} profile URLs:\n"
-        for url, count in sorted(urls.items(), key=lambda x: x[1], reverse=True)[
-            :top_urls_count
-        ]:
+        # Format output
+        separator = "\n\n"
+        output = [
+            f"Enabled/total sites: {enabled_count}/{total_count} = {enabled_perc}%",
+            f"Incomplete message checks: {message_checks_one_factor}/{enabled_count} = {checks_perc}% (false positive risks)",
+            f"Status code checks: {status_checks}/{enabled_count} = {status_checks_perc}% (false positive risks)",
+            f"False positive risk (total): {checks_perc + status_checks_perc:.2f}%",
+            self._format_top_items("profile URLs", urls, 20, is_markdown),
+            self._format_top_items("tags", tags, 20, is_markdown, self._tags),
+        ]
+
+        return separator.join(output)
+
+    def _format_top_items(self, title, items_dict, limit, is_markdown, valid_items=None):
+        """Helper method to format top items lists"""
+        output = f"Top {limit} {title}:\n"
+        for item, count in sorted(items_dict.items(), key=lambda x: x[1], reverse=True)[:limit]:
             if count == 1:
                 break
-            output += f"- ({count})\t`{url}`\n" if is_markdown else f"{count}\t{url}\n"
-
-        top_tags_count = 20
-        output += f"\nTop {top_tags_count} tags:\n"
-        for tag, count in sorted(tags.items(), key=lambda x: x[1], reverse=True)[
-            :top_tags_count
-        ]:
-            mark = ""
-            if tag not in self._tags:
-                mark = " (non-standard)"
-            output += (
-                f"- ({count})\t`{tag}`{mark}\n"
-                if is_markdown
-                else f"{count}\t{tag}{mark}\n"
-            )
-
+            mark = " (non-standard)" if valid_items is not None and item not in valid_items else ""
+            output += f"- ({count})\t`{item}`{mark}\n" if is_markdown else f"{count}\t{item}{mark}\n"
         return output
diff --git a/poetry.lock b/poetry.lock
index 3c228b3a..09bedace 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "about-time"
@@ -1377,6 +1377,70 @@ files = [
 [package.dependencies]
 typing-extensions = {version = ">=4.1.0", markers = "python_version < \"3.11\""}
 
+[[package]]
+name = "mypy"
+version = "1.13.0"
+description = "Optional static typing for Python"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "mypy-1.13.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:6607e0f1dd1fb7f0aca14d936d13fd19eba5e17e1cd2a14f808fa5f8f6d8f60a"},
+    {file = "mypy-1.13.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:8a21be69bd26fa81b1f80a61ee7ab05b076c674d9b18fb56239d72e21d9f4c80"},
+    {file = "mypy-1.13.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7b2353a44d2179846a096e25691d54d59904559f4232519d420d64da6828a3a7"},
+    {file = "mypy-1.13.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0730d1c6a2739d4511dc4253f8274cdd140c55c32dfb0a4cf8b7a43f40abfa6f"},
+    {file = "mypy-1.13.0-cp310-cp310-win_amd64.whl", hash = "sha256:c5fc54dbb712ff5e5a0fca797e6e0aa25726c7e72c6a5850cfd2adbc1eb0a372"},
+    {file = "mypy-1.13.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:581665e6f3a8a9078f28d5502f4c334c0c8d802ef55ea0e7276a6e409bc0d82d"},
+    {file = "mypy-1.13.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:3ddb5b9bf82e05cc9a627e84707b528e5c7caaa1c55c69e175abb15a761cec2d"},
+    {file = "mypy-1.13.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:20c7ee0bc0d5a9595c46f38beb04201f2620065a93755704e141fcac9f59db2b"},
+    {file = "mypy-1.13.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:3790ded76f0b34bc9c8ba4def8f919dd6a46db0f5a6610fb994fe8efdd447f73"},
+    {file = "mypy-1.13.0-cp311-cp311-win_amd64.whl", hash = "sha256:51f869f4b6b538229c1d1bcc1dd7d119817206e2bc54e8e374b3dfa202defcca"},
+    {file = "mypy-1.13.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:5c7051a3461ae84dfb5dd15eff5094640c61c5f22257c8b766794e6dd85e72d5"},
+    {file = "mypy-1.13.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:39bb21c69a5d6342f4ce526e4584bc5c197fd20a60d14a8624d8743fffb9472e"},
+    {file = "mypy-1.13.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:164f28cb9d6367439031f4c81e84d3ccaa1e19232d9d05d37cb0bd880d3f93c2"},
+    {file = "mypy-1.13.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a4c1bfcdbce96ff5d96fc9b08e3831acb30dc44ab02671eca5953eadad07d6d0"},
+    {file = "mypy-1.13.0-cp312-cp312-win_amd64.whl", hash = "sha256:a0affb3a79a256b4183ba09811e3577c5163ed06685e4d4b46429a271ba174d2"},
+    {file = "mypy-1.13.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:a7b44178c9760ce1a43f544e595d35ed61ac2c3de306599fa59b38a6048e1aa7"},
+    {file = "mypy-1.13.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:5d5092efb8516d08440e36626f0153b5006d4088c1d663d88bf79625af3d1d62"},
+    {file = "mypy-1.13.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:de2904956dac40ced10931ac967ae63c5089bd498542194b436eb097a9f77bc8"},
+    {file = "mypy-1.13.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:7bfd8836970d33c2105562650656b6846149374dc8ed77d98424b40b09340ba7"},
+    {file = "mypy-1.13.0-cp313-cp313-win_amd64.whl", hash = "sha256:9f73dba9ec77acb86457a8fc04b5239822df0c14a082564737833d2963677dbc"},
+    {file = "mypy-1.13.0-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:100fac22ce82925f676a734af0db922ecfea991e1d7ec0ceb1e115ebe501301a"},
+    {file = "mypy-1.13.0-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:7bcb0bb7f42a978bb323a7c88f1081d1b5dee77ca86f4100735a6f541299d8fb"},
+    {file = "mypy-1.13.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:bde31fc887c213e223bbfc34328070996061b0833b0a4cfec53745ed61f3519b"},
+    {file = "mypy-1.13.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:07de989f89786f62b937851295ed62e51774722e5444a27cecca993fc3f9cd74"},
+    {file = "mypy-1.13.0-cp38-cp38-win_amd64.whl", hash = "sha256:4bde84334fbe19bad704b3f5b78c4abd35ff1026f8ba72b29de70dda0916beb6"},
+    {file = "mypy-1.13.0-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0246bcb1b5de7f08f2826451abd947bf656945209b140d16ed317f65a17dc7dc"},
+    {file = "mypy-1.13.0-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:7f5b7deae912cf8b77e990b9280f170381fdfbddf61b4ef80927edd813163732"},
+    {file = "mypy-1.13.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:7029881ec6ffb8bc233a4fa364736789582c738217b133f1b55967115288a2bc"},
+    {file = "mypy-1.13.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:3e38b980e5681f28f033f3be86b099a247b13c491f14bb8b1e1e134d23bb599d"},
+    {file = "mypy-1.13.0-cp39-cp39-win_amd64.whl", hash = "sha256:a6789be98a2017c912ae6ccb77ea553bbaf13d27605d2ca20a76dfbced631b24"},
+    {file = "mypy-1.13.0-py3-none-any.whl", hash = "sha256:9c250883f9fd81d212e0952c92dbfcc96fc237f4b7c92f56ac81fd48460b3e5a"},
+    {file = "mypy-1.13.0.tar.gz", hash = "sha256:0291a61b6fbf3e6673e3405cfcc0e7650bebc7939659fdca2702958038bd835e"},
+]
+
+[package.dependencies]
+mypy-extensions = ">=1.0.0"
+tomli = {version = ">=1.1.0", markers = "python_version < \"3.11\""}
+typing-extensions = ">=4.6.0"
+
+[package.extras]
+dmypy = ["psutil (>=4.0)"]
+faster-cache = ["orjson"]
+install-types = ["pip"]
+mypyc = ["setuptools (>=50)"]
+reports = ["lxml"]
+
+[[package]]
+name = "mypy-extensions"
+version = "1.0.0"
+description = "Type system extensions for programs checked with the mypy type checker."
+optional = false
+python-versions = ">=3.5"
+files = [
+    {file = "mypy_extensions-1.0.0-py3-none-any.whl", hash = "sha256:4392f6c0eb8a5668a69e23d168ffa70f0be9ccfd32b5cc2d26a34ae5b844552d"},
+    {file = "mypy_extensions-1.0.0.tar.gz", hash = "sha256:75dbf8955dc00442a438fc4d0666508a9a97b6bd41aa2f0ffe9d2f2725af0782"},
+]
+
 [[package]]
 name = "networkx"
 version = "2.8.8"
@@ -2546,6 +2610,17 @@ files = [
 docs = ["myst-parser", "pydata-sphinx-theme", "sphinx"]
 test = ["argcomplete (>=3.0.3)", "mypy (>=1.7.0)", "pre-commit", "pytest (>=7.0,<8.2)", "pytest-mock", "pytest-mypy-testing"]
 
+[[package]]
+name = "tuna"
+version = "0.5.11"
+description = "Visualize Python performance profiles"
+optional = false
+python-versions = ">=3.6"
+files = [
+    {file = "tuna-0.5.11-py3-none-any.whl", hash = "sha256:ab352a6d836014ace585ecd882148f1f7c68be9ea4bf9e9298b7127594dab2ef"},
+    {file = "tuna-0.5.11.tar.gz", hash = "sha256:d47f3e39e80af961c8df016ac97d1643c3c60b5eb451299da0ab5fe411d8866c"},
+]
+
 [[package]]
 name = "typing-extensions"
 version = "4.12.2"
@@ -2791,4 +2866,4 @@ propcache = ">=0.2.0"
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.10"
-content-hash = "988c92f025a627b4c9394bf17872e6c9e506b8fa8070d51f830e92abf48aa530"
+content-hash = "e521713c426ae38d26d975fbd47ad5159e393d6532be845af169e03b7421fd40"
diff --git a/pyproject.toml b/pyproject.toml
index 2ef6243d..8bc34e2c 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,6 +25,11 @@ classifiers = [
 "Bug Tracker" = "https://github.com/soxoj/maigret/issues"
 
 [tool.poetry.dependencies]
+# poetry install
+# Install only production dependencies:
+# poetry install --without dev
+# Install with dev dependencies:
+# poetry install --with dev
 python = "^3.10"
 aiodns = "^3.0.0"
 aiohttp = "^3.11.8"
@@ -68,6 +73,8 @@ cloudscraper = "^1.2.71"
 
 
 [tool.poetry.group.dev.dependencies]
+# How to add a new dev dependency: poetry add black --group dev
+# Install dev dependencies with: poetry install --with dev
 flake8 = "^7.1.1"
 pytest = "^7.2.0"
 pytest-asyncio = "^0.23.8"
@@ -75,6 +82,9 @@ pytest-cov = "^6.0.0"
 pytest-httpserver = "^1.0.0"
 pytest-rerunfailures = "^15.0"
 reportlab = "^4.2.0"
+mypy = "^1.13.0"
+tuna = "^0.5.11"
 
 [tool.poetry.scripts]
+# Run with: poetry run maigret <username>
 maigret = "maigret.maigret:run"
diff --git a/sites.md b/sites.md
index 0f900576..bc34463f 100644
--- a/sites.md
+++ b/sites.md
@@ -3130,16 +3130,17 @@ Rank data fetched from Alexa by domains.
 1. ![](https://www.google.com/s2/favicons?domain=https://massagerepublic.com) [massagerepublic.com (https://massagerepublic.com)](https://massagerepublic.com)*: top 100M*
 1. ![](https://www.google.com/s2/favicons?domain=https://mynickname.com) [mynickname.com (https://mynickname.com)](https://mynickname.com)*: top 100M*
 
-The list was updated at (2024-11-29 UTC)
+The list was updated at (2024-11-30)
+
 ## Statistics
 
 Enabled/total sites: 2693/3126 = 86.15%
 
 Incomplete message checks: 404/2693 = 15.0% (false positive risks)
 
-Status code checks: 720/2693 = 26.74% (false positive risks)
+Status code checks: 618/2694 = 22.94% (false positive risks)
 
-False positive risk (total): 41.74%
+False positive risk (total): 37.97%
 
 Top 20 profile URLs:
 - (796)	`{urlMain}/index/8-0-{username} (uCoz)`
@@ -3163,24 +3164,25 @@ Top 20 profile URLs:
 - (17)	`/forum/members/?username={username}`
 - (17)	`/search.php?keywords=&terms=all&author={username}`
 
+
 Top 20 tags:
-- (327)	`NO_TAGS` (non-standard)
-- (307)	`forum`
-- (50)	`gaming`
-- (26)	`coding`
-- (21)	`photo`
-- (20)	`blog`
-- (19)	`news`
-- (15)	`music`
-- (14)	`tech`
-- (12)	`freelance`
-- (12)	`finance`
-- (11)	`sharing`
-- (10)	`dating`
-- (10)	`art`
-- (10)	`shopping`
-- (10)	`movies`
-- (8)	`hobby`
-- (8)	`crypto`
-- (7)	`sport`
-- (7)	`hacking`
+- (1104)	`NO_TAGS` (non-standard)
+- (735)	`forum`
+- (80)	`gaming`
+- (48)	`photo`
+- (41)	`coding`
+- (30)	`tech`
+- (29)	`news`
+- (27)	`blog`
+- (23)	`music`
+- (18)	`finance`
+- (18)	`crypto`
+- (17)	`sharing`
+- (16)	`freelance`
+- (15)	`art`
+- (15)	`shopping`
+- (13)	`sport`
+- (13)	`business`
+- (12)	`movies`
+- (11)	`hobby`
+- (11)	`education`
diff --git a/utils/update_site_data.py b/utils/update_site_data.py
index a4f5a054..dd275fef 100755
--- a/utils/update_site_data.py
+++ b/utils/update_site_data.py
@@ -137,7 +137,7 @@ def get_readable_rank(r):
             site_file.write(f'1. {favicon} [{site}]({url_main})*: top {valid_rank}{tags}*{note}\n')
             db.update_site(site)
 
-        site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()} UTC)\n')
+        site_file.write(f'\nThe list was updated at ({datetime.now(timezone.utc).date()})\n')
         db.save_to_file(args.base_file)
 
         statistics_text = db.get_db_stats(is_markdown=True)