From b64672da8c6de19542cd9eb2fd666d1d3f7a2713 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 10:38:29 -0500 Subject: [PATCH 1/7] added hf token Signed-off-by: Maroun Touma --- transforms/language/lang_id/test/test_lang_id.py | 3 ++- transforms/language/lang_id/test/test_lang_id_python.py | 2 +- transforms/language/lang_id/test/test_lang_id_ray.py | 2 +- transforms/language/lang_id/test/test_nlp.py | 6 +++++- 4 files changed, 9 insertions(+), 4 deletions(-) diff --git a/transforms/language/lang_id/test/test_lang_id.py b/transforms/language/lang_id/test/test_lang_id.py index 10984d4a40..7e7ccb385d 100644 --- a/transforms/language/lang_id/test/test_lang_id.py +++ b/transforms/language/lang_id/test/test_lang_id.py @@ -10,6 +10,7 @@ # limitations under the License. ################################################################################ +import os import pyarrow as pa from data_processing.test_support.transform.table_transform_test import ( AbstractTableTransformTest, @@ -26,7 +27,7 @@ class TestLangIdentificationTransform(AbstractTableTransformTest): def get_test_transform_fixtures(self) -> list[tuple]: config = { - "model_credential": "PUT YOUR OWN HUGGINGFACE CREDENTIAL", + "model_credential": os.environ.get('HF_READ_ACCESS_TOKEN', "PUT YOUR OWN HUGGINGFACE CREDENTIAL"), "model_kind": KIND_FASTTEXT, "model_url": "facebook/fasttext-language-identification", "content_column_name": "contents", diff --git a/transforms/language/lang_id/test/test_lang_id_python.py b/transforms/language/lang_id/test/test_lang_id_python.py index 25e6702536..ca821c9f18 100644 --- a/transforms/language/lang_id/test/test_lang_id_python.py +++ b/transforms/language/lang_id/test/test_lang_id_python.py @@ -28,7 +28,7 @@ class TestPythonLangIdentificationTransform(AbstractTransformLauncherTest): def get_test_transform_fixtures(self) -> list[tuple]: cli_params = { - "lang_id_model_credential": "PUT YOUR OWN HUGGINGFACE CREDENTIAL", + "lang_id_model_credential": os.environ.get('HF_READ_ACCESS_TOKEN', "PUT YOUR OWN HUGGINGFACE CREDENTIAL"), "lang_id_model_kind": KIND_FASTTEXT, "lang_id_model_url": "facebook/fasttext-language-identification", "lang_id_content_column_name": "text", diff --git a/transforms/language/lang_id/test/test_lang_id_ray.py b/transforms/language/lang_id/test/test_lang_id_ray.py index 466db2d86b..23cffe5556 100644 --- a/transforms/language/lang_id/test/test_lang_id_ray.py +++ b/transforms/language/lang_id/test/test_lang_id_ray.py @@ -37,7 +37,7 @@ def get_test_transform_fixtures(self) -> list[tuple]: basedir = "../test-data" basedir = os.path.abspath(os.path.join(os.path.dirname(__file__), basedir)) config = { - model_credential_cli_param: "PUT YOUR OWN HUGGINGFACE CREDENTIAL", + model_credential_cli_param: os.environ.get('HF_READ_ACCESS_TOKEN', "PUT YOUR OWN HUGGINGFACE CREDENTIAL"), model_kind_cli_param: KIND_FASTTEXT, model_url_cli_param: "facebook/fasttext-language-identification", content_column_name_cli_param: "text", diff --git a/transforms/language/lang_id/test/test_nlp.py b/transforms/language/lang_id/test/test_nlp.py index e61b2cc1cf..f6be51d707 100644 --- a/transforms/language/lang_id/test/test_nlp.py +++ b/transforms/language/lang_id/test/test_nlp.py @@ -10,6 +10,7 @@ # limitations under the License. ################################################################################ +import os import pyarrow as pa from dpk_lang_id.lang_models import KIND_FASTTEXT, LangModelFactory from dpk_lang_id.nlp import get_lang_ds_pa @@ -17,8 +18,11 @@ def test_language_identification(): nlp_langid = LangModelFactory.create_model( - KIND_FASTTEXT, "facebook/fasttext-language-identification", "YOUR HUGGING FACE ACCOUNT TOKEN" + KIND_FASTTEXT, + "facebook/fasttext-language-identification", + os.environ.get('HF_READ_ACCESS_TOKEN', "YOUR HUGGING FACE ACCOUNT TOKEN") ) + documents = pa.array( [ "Der Tell Sabi Abyad („Hügel des weißen Jungen“) ist eine historische " From 3e92e89cfa63b2ce487e5188b519636a084b9545 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 10:47:28 -0500 Subject: [PATCH 2/7] testing with token Signed-off-by: Maroun Touma --- transforms/language/lang_id/test/test_lang_id.py | 1 + 1 file changed, 1 insertion(+) diff --git a/transforms/language/lang_id/test/test_lang_id.py b/transforms/language/lang_id/test/test_lang_id.py index 7e7ccb385d..07db1b1d6d 100644 --- a/transforms/language/lang_id/test/test_lang_id.py +++ b/transforms/language/lang_id/test/test_lang_id.py @@ -34,6 +34,7 @@ def get_test_transform_fixtures(self) -> list[tuple]: "output_lang_column_name": "l", "output_score_column_name": "s", } + print(config) table = pa.Table.from_arrays( [ pa.array( From 2e10337b3b94f0a326e3fbd0334eac37a308ac32 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 10:52:13 -0500 Subject: [PATCH 3/7] testing with token Signed-off-by: Maroun Touma --- transforms/language/lang_id/test/test_lang_id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transforms/language/lang_id/test/test_lang_id.py b/transforms/language/lang_id/test/test_lang_id.py index 07db1b1d6d..3efb489091 100644 --- a/transforms/language/lang_id/test/test_lang_id.py +++ b/transforms/language/lang_id/test/test_lang_id.py @@ -34,7 +34,7 @@ def get_test_transform_fixtures(self) -> list[tuple]: "output_lang_column_name": "l", "output_score_column_name": "s", } - print(config) + print(os.environ) table = pa.Table.from_arrays( [ pa.array( From b9d22d5ab76cd755a2abb5a1b2e839a0df219b8c Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 12:30:53 -0500 Subject: [PATCH 4/7] add HF token secret Signed-off-by: Maroun Touma --- .github/workflows/test-language-lang_id.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test-language-lang_id.yml b/.github/workflows/test-language-lang_id.yml index 3108cca538..3a5a14a056 100644 --- a/.github/workflows/test-language-lang_id.yml +++ b/.github/workflows/test-language-lang_id.yml @@ -69,6 +69,9 @@ jobs: echo "publish_images=$publish_images" >> "$GITHUB_OUTPUT" test-src: runs-on: ubuntu-22.04 + env: + HF_READ_ACCESS_TOKEN: ${{secrets.HF_READ_ACCESS_TOKEN }} + steps: - name: Checkout uses: actions/checkout@v4 From dbd15147b1836b3546c9a995d01a2a1810812ad3 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 13:24:56 -0500 Subject: [PATCH 5/7] regenerate token Signed-off-by: Maroun Touma --- .github/workflows/test-language-lang_id.yml | 3 +-- transforms/language/lang_id/test/test_lang_id.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-language-lang_id.yml b/.github/workflows/test-language-lang_id.yml index 3a5a14a056..e279195dc8 100644 --- a/.github/workflows/test-language-lang_id.yml +++ b/.github/workflows/test-language-lang_id.yml @@ -70,8 +70,7 @@ jobs: test-src: runs-on: ubuntu-22.04 env: - HF_READ_ACCESS_TOKEN: ${{secrets.HF_READ_ACCESS_TOKEN }} - + HF_READ_ACCESS_TOKEN: ${{ secrets.HF_READ_ACCESS_TOKEN }} steps: - name: Checkout uses: actions/checkout@v4 diff --git a/transforms/language/lang_id/test/test_lang_id.py b/transforms/language/lang_id/test/test_lang_id.py index 3efb489091..0efd6d4393 100644 --- a/transforms/language/lang_id/test/test_lang_id.py +++ b/transforms/language/lang_id/test/test_lang_id.py @@ -34,7 +34,7 @@ def get_test_transform_fixtures(self) -> list[tuple]: "output_lang_column_name": "l", "output_score_column_name": "s", } - print(os.environ) + print(os.environ['HF_READ_ACCESS_TOKEN']) table = pa.Table.from_arrays( [ pa.array( From b6b12fc8a132cdfdafc9313527456689099cda88 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 13:30:52 -0500 Subject: [PATCH 6/7] -sexpose all env --- transforms/language/lang_id/test/test_lang_id.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/transforms/language/lang_id/test/test_lang_id.py b/transforms/language/lang_id/test/test_lang_id.py index 0efd6d4393..3efb489091 100644 --- a/transforms/language/lang_id/test/test_lang_id.py +++ b/transforms/language/lang_id/test/test_lang_id.py @@ -34,7 +34,7 @@ def get_test_transform_fixtures(self) -> list[tuple]: "output_lang_column_name": "l", "output_score_column_name": "s", } - print(os.environ['HF_READ_ACCESS_TOKEN']) + print(os.environ) table = pa.Table.from_arrays( [ pa.array( From 3ff092337c2251f9356afab41572431fcbdc7e52 Mon Sep 17 00:00:00 2001 From: Maroun Touma Date: Tue, 11 Feb 2025 14:44:51 -0500 Subject: [PATCH 7/7] verifying env variables Signed-off-by: Maroun Touma --- .github/workflows/test-language-lang_id.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/test-language-lang_id.yml b/.github/workflows/test-language-lang_id.yml index e279195dc8..180c20e365 100644 --- a/.github/workflows/test-language-lang_id.yml +++ b/.github/workflows/test-language-lang_id.yml @@ -71,6 +71,8 @@ jobs: runs-on: ubuntu-22.04 env: HF_READ_ACCESS_TOKEN: ${{ secrets.HF_READ_ACCESS_TOKEN }} + DOCKER_REGISTRY_USER: ${{ secrets.DOCKER_REGISTRY_USER }} + DOCKER_REGISTRY_KEY: ${{ secrets.DOCKER_REGISTRY_KEY }} steps: - name: Checkout uses: actions/checkout@v4