From 95d672acd84af9456e741a5d975648741454dab2 Mon Sep 17 00:00:00 2001 From: Devis Lucato Date: Mon, 17 Jul 2023 15:59:25 -0700 Subject: [PATCH] Python: Azure Search installation and tests improvements (#2030) * Move Azure Search dependencies to own group * Improve Azure Search integration tests: try to delete collections when tests fail * Wait 1 sec after upsert to reduce random failures --------- Co-authored-by: Abby Harrison --- .github/workflows/python-unit-tests.yml | 32 +++--- python/poetry.lock | 103 +++++++++++------- python/pyproject.toml | 8 +- .../connectors/memory/test_azure_search.py | 83 +++++++++----- 4 files changed, 138 insertions(+), 88 deletions(-) diff --git a/.github/workflows/python-unit-tests.yml b/.github/workflows/python-unit-tests.yml index 9b9225b9a102..a4cbe06573fd 100644 --- a/.github/workflows/python-unit-tests.yml +++ b/.github/workflows/python-unit-tests.yml @@ -3,9 +3,9 @@ name: Python Unit Tests on: workflow_dispatch: pull_request: - branches: [ "main", "feature*" ] + branches: ["main", "feature*"] paths: - - 'python/**' + - "python/**" jobs: python-unit-tests: @@ -14,19 +14,19 @@ jobs: fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11"] - os: [ ubuntu-latest, windows-latest, macos-latest ] + os: [ubuntu-latest, windows-latest, macos-latest] steps: - - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v4 - with: - python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install poetry pytest - cd python - poetry install --without chromadb --without hugging_face - - name: Test with pytest - run: | - cd python && poetry run pytest ./tests/unit + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Install dependencies + run: | + python -m pip install poetry pytest + cd python + poetry install --without chromadb --without hugging_face --without azure_search --without weaviate --without pinecone --without postgres + - name: Test with pytest + run: | + cd python && poetry run pytest ./tests/unit diff --git a/python/poetry.lock b/python/poetry.lock index 758f8f39a4bd..1464b77e4df4 100644 --- a/python/poetry.lock +++ b/python/poetry.lock @@ -238,7 +238,7 @@ cryptography = ">=3.2" name = "azure-common" version = "1.1.28" description = "Microsoft Azure Client Library for Python (Common)" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -250,7 +250,7 @@ files = [ name = "azure-core" version = "1.28.0" description = "Microsoft Azure Core Library for Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -270,7 +270,7 @@ aio = ["aiohttp (>=3.0)"] name = "azure-identity" version = "1.13.0" description = "Microsoft Azure Identity Library for Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -289,7 +289,7 @@ six = ">=1.12.0" name = "azure-search-documents" version = "11.4.0b6" description = "Microsoft Azure Cognitive Search Client Library for Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -421,7 +421,7 @@ files = [ name = "cffi" version = "1.15.1" description = "Foreign Function Interface for Python calling C code." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -811,7 +811,7 @@ typing = ["mypy (>=0.990)"] name = "cryptography" version = "41.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -895,32 +895,35 @@ files = [ [[package]] name = "distlib" -version = "0.3.6" +version = "0.3.7" description = "Distribution utilities" category = "dev" optional = false python-versions = "*" files = [ - {file = "distlib-0.3.6-py2.py3-none-any.whl", hash = "sha256:f35c4b692542ca110de7ef0bea44d73981caeb34ca0b9b6b2e6d7790dda8f80e"}, - {file = "distlib-0.3.6.tar.gz", hash = "sha256:14bad2d9b04d3a36127ac97f30b12a19268f211063d8f8ee4f47108896e11b46"}, + {file = "distlib-0.3.7-py2.py3-none-any.whl", hash = "sha256:2e24928bc811348f0feb63014e97aaae3037f2cf48712d51ae61df7fd6075057"}, + {file = "distlib-0.3.7.tar.gz", hash = "sha256:9dafe54b34a028eafd95039d5e5d4851a13734540f1331060d31c9916e7147a8"}, ] [[package]] name = "dnspython" -version = "2.3.0" +version = "2.4.0" description = "DNS toolkit" category = "dev" optional = false -python-versions = ">=3.7,<4.0" +python-versions = ">=3.8,<4.0" files = [ - {file = "dnspython-2.3.0-py3-none-any.whl", hash = "sha256:89141536394f909066cabd112e3e1a37e4e654db00a25308b0f130bc3152eb46"}, - {file = "dnspython-2.3.0.tar.gz", hash = "sha256:224e32b03eb46be70e12ef6d64e0be123a64e621ab4c0822ff6d450d52a540b9"}, + {file = "dnspython-2.4.0-py3-none-any.whl", hash = "sha256:46b4052a55b56beea3a3bdd7b30295c292bd6827dd442348bc116f2d35b17f0a"}, + {file = "dnspython-2.4.0.tar.gz", hash = "sha256:758e691dbb454d5ccf4e1b154a19e52847f79e21a42fef17b969144af29a4e6c"}, ] +[package.dependencies] +httpcore = {version = ">=0.17.3", markers = "python_version >= \"3.8\""} +sniffio = ">=1.1,<2.0" + [package.extras] -curio = ["curio (>=1.2,<2.0)", "sniffio (>=1.1,<2.0)"] -dnssec = ["cryptography (>=2.6,<40.0)"] -doh = ["h2 (>=4.1.0)", "httpx (>=0.21.1)", "requests (>=2.23.0,<3.0.0)", "requests-toolbelt (>=0.9.1,<0.11.0)"] +dnssec = ["cryptography (>=2.6,<42.0)"] +doh = ["h2 (>=4.1.0)", "httpx (>=0.24.1)"] doq = ["aioquic (>=0.9.20)"] idna = ["idna (>=2.1,<4.0)"] trio = ["trio (>=0.14,<0.23)"] @@ -1213,6 +1216,28 @@ files = [ [package.dependencies] numpy = "*" +[[package]] +name = "httpcore" +version = "0.17.3" +description = "A minimal low-level HTTP client." +category = "dev" +optional = false +python-versions = ">=3.7" +files = [ + {file = "httpcore-0.17.3-py3-none-any.whl", hash = "sha256:c2789b767ddddfa2a5782e3199b2b7f6894540b17b16ec26b2c4d8e103510b87"}, + {file = "httpcore-0.17.3.tar.gz", hash = "sha256:a6f30213335e34c1ade7be6ec7c47f19f50c56db36abef1a9dfa3815b1cb3888"}, +] + +[package.dependencies] +anyio = ">=3.0,<5.0" +certifi = "*" +h11 = ">=0.13,<0.15" +sniffio = ">=1.0.0,<2.0.0" + +[package.extras] +http2 = ["h2 (>=3,<5)"] +socks = ["socksio (>=1.0.0,<2.0.0)"] + [[package]] name = "httptools" version = "0.6.0" @@ -1446,7 +1471,7 @@ test-extra = ["curio", "matplotlib (!=3.2.0)", "nbformat", "numpy (>=1.21)", "pa name = "isodate" version = "0.6.1" description = "An ISO 8601 date/time/duration parser and formatter" -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -1741,7 +1766,7 @@ tests = ["pytest (>=4.6)"] name = "msal" version = "1.22.0" description = "The Microsoft Authentication Library (MSAL) for Python library enables your app to access the Microsoft Cloud by supporting authentication of users with Microsoft Azure Active Directory accounts (AAD) and Microsoft Accounts (MSA) using industry standard OAuth2 and OpenID Connect." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -1761,7 +1786,7 @@ broker = ["pymsalruntime (>=0.13.2,<0.14)"] name = "msal-extensions" version = "1.0.0" description = "Microsoft Authentication Library extensions (MSAL EX) provides a persistence API that can save your data on disk, encrypted on Windows, macOS and Linux. Concurrent data access will be coordinated by a file lock mechanism." -category = "main" +category = "dev" optional = false python-versions = "*" files = [ @@ -2459,14 +2484,14 @@ grpc = ["googleapis-common-protos (>=1.53.0)", "grpc-gateway-protoc-gen-openapiv [[package]] name = "platformdirs" -version = "3.8.1" +version = "3.9.1" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a \"user data dir\"." category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "platformdirs-3.8.1-py3-none-any.whl", hash = "sha256:cec7b889196b9144d088e4c57d9ceef7374f6c39694ad1577a0aab50d27ea28c"}, - {file = "platformdirs-3.8.1.tar.gz", hash = "sha256:f87ca4fcff7d2b0f81c6a748a77973d7af0f4d526f98f308477c3c436c74d528"}, + {file = "platformdirs-3.9.1-py3-none-any.whl", hash = "sha256:ad8291ae0ae5072f66c16945166cb11c63394c7a3ad1b1bc9828ca3162da8c2f"}, + {file = "platformdirs-3.9.1.tar.gz", hash = "sha256:1b42b450ad933e981d56e59f1b97495428c9bd60698baab9f3eb3d00d5822421"}, ] [package.extras] @@ -2493,7 +2518,7 @@ testing = ["pytest", "pytest-benchmark"] name = "portalocker" version = "2.7.0" description = "Wraps the portalocker recipe for easy usage" -category = "main" +category = "dev" optional = false python-versions = ">=3.5" files = [ @@ -2800,7 +2825,7 @@ tests = ["pytest"] name = "pycparser" version = "2.21" description = "C parser in Python" -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" files = [ @@ -2880,7 +2905,7 @@ plugins = ["importlib-metadata"] name = "pyjwt" version = "2.7.0" description = "JSON Web Token implementation in Python" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -3546,7 +3571,7 @@ testing-integration = ["build[virtualenv]", "filelock (>=3.4.0)", "jaraco.envs ( name = "six" version = "1.16.0" description = "Python 2 and 3 compatibility utilities" -category = "main" +category = "dev" optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*" files = [ @@ -3964,7 +3989,7 @@ tutorials = ["matplotlib", "pandas", "tabulate"] name = "typing-extensions" version = "4.7.1" description = "Backported and Experimental Type Hints for Python 3.7+" -category = "main" +category = "dev" optional = false python-versions = ">=3.7" files = [ @@ -4004,14 +4029,14 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "uvicorn" -version = "0.22.0" +version = "0.23.0" description = "The lightning-fast ASGI server." category = "dev" optional = false -python-versions = ">=3.7" +python-versions = ">=3.8" files = [ - {file = "uvicorn-0.22.0-py3-none-any.whl", hash = "sha256:e9434d3bbf05f310e762147f769c9f21235ee118ba2d2bf1155a7196448bd996"}, - {file = "uvicorn-0.22.0.tar.gz", hash = "sha256:79277ae03db57ce7d9aa0567830bbb51d7a612f54d6e1e3e92da3ef24c2c8ed8"}, + {file = "uvicorn-0.23.0-py3-none-any.whl", hash = "sha256:479599b2c0bb1b9b394c6d43901a1eb0c1ec72c7d237b5bafea23c5b2d4cdf10"}, + {file = "uvicorn-0.23.0.tar.gz", hash = "sha256:d38ab90c0e2c6fe3a054cddeb962cfd5d0e0e6608eaaff4a01d5c36a67f3168c"}, ] [package.dependencies] @@ -4092,14 +4117,14 @@ test = ["flake8 (>=2.4.0)", "isort (>=4.2.2)", "pytest (>=2.2.3)"] [[package]] name = "virtualenv" -version = "20.23.1" +version = "20.24.0" description = "Virtual Python Environment builder" category = "dev" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.23.1-py3-none-any.whl", hash = "sha256:34da10f14fea9be20e0fd7f04aba9732f84e593dac291b757ce42e3368a39419"}, - {file = "virtualenv-20.23.1.tar.gz", hash = "sha256:8ff19a38c1021c742148edc4f81cb43d7f8c6816d2ede2ab72af5b84c749ade1"}, + {file = "virtualenv-20.24.0-py3-none-any.whl", hash = "sha256:18d1b37fc75cc2670625702d76849a91ebd383768b4e91382a8d51be3246049e"}, + {file = "virtualenv-20.24.0.tar.gz", hash = "sha256:e2a7cef9da880d693b933db7654367754f14e20650dc60e8ee7385571f8593a3"}, ] [package.dependencies] @@ -4379,19 +4404,19 @@ multidict = ">=4.0" [[package]] name = "zipp" -version = "3.16.1" +version = "3.16.2" description = "Backport of pathlib-compatible object wrapper for zip files" category = "dev" optional = false python-versions = ">=3.8" files = [ - {file = "zipp-3.16.1-py3-none-any.whl", hash = "sha256:0b37c326d826d5ca35f2b9685cd750292740774ef16190008b00a0227c256fe0"}, - {file = "zipp-3.16.1.tar.gz", hash = "sha256:857b158da2cbf427b376da1c24fd11faecbac5a4ac7523c3607f8a01f94c2ec0"}, + {file = "zipp-3.16.2-py3-none-any.whl", hash = "sha256:679e51dd4403591b2d6838a48de3d283f3d188412a9782faadf845f298736ba0"}, + {file = "zipp-3.16.2.tar.gz", hash = "sha256:ebc15946aa78bd63458992fc81ec3b6f7b1e92d51c35e6de1c3804e73b799147"}, ] [package.extras] docs = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"] -testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-mypy (>=0.9.1)", "pytest-ruff"] +testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "pytest (>=6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=2.2)", "pytest-ignore-flaky", "pytest-mypy (>=0.9.1)", "pytest-ruff"] [[package]] name = "zstandard" @@ -4455,4 +4480,4 @@ cffi = ["cffi (>=1.11)"] [metadata] lock-version = "2.0" python-versions = "^3.8" -content-hash = "9bea7ded976c2b16e3e83504f4f9a943499080005222c695067da796fcef7ebd" +content-hash = "4ba6ba6436918fe498163a71df58e3f97f1c842999c92d1bdafed70ad2cc6ee3" diff --git a/python/pyproject.toml b/python/pyproject.toml index 6226b8a4313d..67c2b00c6494 100644 --- a/python/pyproject.toml +++ b/python/pyproject.toml @@ -13,9 +13,6 @@ openai = "^0.27.0" aiofiles = "^23.1.0" python-dotenv = "1.0.0" regex = "^2023.6.3" -azure-search-documents = {version = "11.4.0b6", allow-prereleases = true} -azure-core = "^1.28.0" -azure-identity = "^1.13.0" [tool.poetry.group.dev.dependencies] pre-commit = "3.3.3" @@ -44,6 +41,11 @@ psycopg-pool = "^3.1.7" psycopg = "^3.1.9" psycopg-binary = "^3.1.9" +[tool.poetry.group.azure_search.dependencies] +azure-search-documents = {version = "11.4.0b6", allow-prereleases = true} +azure-core = "^1.28.0" +azure-identity = "^1.13.0" + [tool.isort] profile = "black" diff --git a/python/tests/integration/connectors/memory/test_azure_search.py b/python/tests/integration/connectors/memory/test_azure_search.py index 3694d858186c..bbd9bed7cda4 100644 --- a/python/tests/integration/connectors/memory/test_azure_search.py +++ b/python/tests/integration/connectors/memory/test_azure_search.py @@ -1,5 +1,6 @@ # Copyright (c) Microsoft. All rights reserved. +import time from random import randint import numpy as np @@ -36,8 +37,16 @@ async def test_collections(memory_store): n = randint(1000, 9999) collection = f"int-tests-{n}" await memory_store.create_collection_async(collection) - assert await memory_store.does_collection_exist_async(collection) + time.sleep(1) + try: + assert await memory_store.does_collection_exist_async(collection) + except: + await memory_store.delete_collection_async(collection) + raise + await memory_store.delete_collection_async(collection) + time.sleep(1) + assert not await memory_store.does_collection_exist_async(collection) @pytest.mark.asyncio @@ -45,20 +54,27 @@ async def test_upsert(memory_store): n = randint(1000, 9999) collection = f"int-tests-{n}" await memory_store.create_collection_async(collection) - assert await memory_store.does_collection_exist_async(collection) - rec = MemoryRecord( - is_reference=False, - external_source_name=None, - id=None, - description="some description", - text="some text", - additional_metadata=None, - embedding=np.array([0.2, 0.1, 0.2, 0.7]), - ) - await memory_store.upsert_async(collection, rec) - result = await memory_store.get_async(collection, rec._id) - assert result._id == rec._id - assert result._text == rec._text + time.sleep(1) + try: + assert await memory_store.does_collection_exist_async(collection) + rec = MemoryRecord( + is_reference=False, + external_source_name=None, + id=None, + description="some description", + text="some text", + additional_metadata=None, + embedding=np.array([0.2, 0.1, 0.2, 0.7]), + ) + await memory_store.upsert_async(collection, rec) + time.sleep(1) + result = await memory_store.get_async(collection, rec._id) + assert result._id == rec._id + assert result._text == rec._text + except: + await memory_store.delete_collection_async(collection) + raise + await memory_store.delete_collection_async(collection) @@ -67,19 +83,26 @@ async def test_search(memory_store): n = randint(1000, 9999) collection = f"int-tests-{n}" await memory_store.create_collection_async(collection) - assert await memory_store.does_collection_exist_async(collection) - rec = MemoryRecord( - is_reference=False, - external_source_name=None, - id=None, - description="some description", - text="some text", - additional_metadata=None, - embedding=np.array([0.1, 0.2, 0.3, 0.4]), - ) - await memory_store.upsert_async(collection, rec) - result = await memory_store.get_nearest_match_async( - collection, np.array([0.1, 0.2, 0.3, 0.38]) - ) - assert result[0]._id == rec._id + time.sleep(1) + try: + assert await memory_store.does_collection_exist_async(collection) + rec = MemoryRecord( + is_reference=False, + external_source_name=None, + id=None, + description="some description", + text="some text", + additional_metadata=None, + embedding=np.array([0.1, 0.2, 0.3, 0.4]), + ) + await memory_store.upsert_async(collection, rec) + time.sleep(1) + result = await memory_store.get_nearest_match_async( + collection, np.array([0.1, 0.2, 0.3, 0.38]) + ) + assert result[0]._id == rec._id + except: + await memory_store.delete_collection_async(collection) + raise + await memory_store.delete_collection_async(collection)