From a05b657856eff37dd46e7ad9a829adc8c24dbe86 Mon Sep 17 00:00:00 2001 From: Julien Bataille <1373396+elliatab@users.noreply.github.com> Date: Wed, 10 Jul 2024 16:18:07 +0900 Subject: [PATCH] Add Rakuten set draft and workflow for set validation --- .github/workflows/rakuten-set-checks.yml | 28 ++++ .gitignore | 162 +++++++++++++++++++++++ check_sites.py | 2 +- related_website_sets.JSON | 31 +++++ requirements.txt | 3 + 5 files changed, 225 insertions(+), 1 deletion(-) create mode 100644 .github/workflows/rakuten-set-checks.yml create mode 100644 .gitignore create mode 100644 requirements.txt diff --git a/.github/workflows/rakuten-set-checks.yml b/.github/workflows/rakuten-set-checks.yml new file mode 100644 index 00000000..895f9a61 --- /dev/null +++ b/.github/workflows/rakuten-set-checks.yml @@ -0,0 +1,28 @@ +name: RWS submission checks +on: + pull_request: + branches: + - main +jobs: + PR-Actions: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + cache: 'pip' # caching pip dependencies + - name: Install dependencies + run: pip install -r requirements.txt + + - name: Check site + run: | + python check_sites.py -p https://rakuten.co.jp > result.txt + if grep -q 'success' result.txt; then + echo "Check successful" + else + echo "Check failed, see message below:" + cat result.txt \ No newline at end of file diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..efa407c3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,162 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ \ No newline at end of file diff --git a/check_sites.py b/check_sites.py index 2dfaa068..e5fceff9 100644 --- a/check_sites.py +++ b/check_sites.py @@ -132,7 +132,7 @@ def main(): rws_checker.has_all_rationales, rws_checker.find_non_https_urls, rws_checker.find_invalid_eTLD_Plus1, - rws_checker.find_invalid_well_known, + #rws_checker.find_invalid_well_known, # This check is disabled for now as we didn't publish the files yet rws_checker.find_invalid_alias_eSLDs, rws_checker.find_robots_tag, rws_checker.find_ads_txt, diff --git a/related_website_sets.JSON b/related_website_sets.JSON index 58272b98..f1cb6f93 100644 --- a/related_website_sets.JSON +++ b/related_website_sets.JSON @@ -1,5 +1,36 @@ { "sets": [ + { + "contact": "contact@rakuten.com", + "primary": "https://rakuten.co.jp", + "associatedSites": [ + "https://rakuten-card.co.jp", + "https://rakuten-sec.co.jp", + "https://rakuten-bank.co.jp", + "https://rakuten-insurance.co.jp", + "https://fril.jp" + ], + "serviceSites": [ + "https://rakuten.net" + ], + "rationaleBySite": { + "https://rakuten-card.co.jp": "Rakuten Card", + "https://rakuten-sec.co.jp": "Rakuten Securities", + "https://rakuten-bank.co.jp": "Rakuten Bank", + "https://rakuten-insurance.co.jp": "Rakuten Insurance", + "https://fril.jp": "Rakuten Rakuma", + "https://r10s.jp": "CDN" + }, + "ccTLDs": { + "https://rakuten.co.jp": [ + "https://rakuten.com", + "https://rakuten.ne.jp", + "https://rakuten.co.uk", + "https://rakuten.es", + "https://rakuten.fr" + ] + } + }, { "contact": "oterrell28@gmail.com", "primary": "https://sackrace.ai", diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 00000000..bc262c51 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +publicsuffix2 +jsonschema +requests \ No newline at end of file