docarray · JoanFM · Mar 21, 2025 · Mar 8, 2024 · Mar 8, 2024 · Mar 11, 2024
diff --git a/.github/workflows/cd.yml b/.github/workflows/cd.yml
@@ -21,7 +21,7 @@ jobs:
       - name: Pre-release (.devN)
         run: |
           git fetch --depth=1 origin +refs/tags/*:refs/tags/*
-          pip install poetry
+          pip install poetry==1.7.1
           ./scripts/release.sh
         env:
           PYPI_USERNAME: ${{ secrets.TWINE_USERNAME }}
@@ -35,20 +35,16 @@ jobs:
     steps:
       - uses: actions/checkout@v3
         with:
-          fetch-depth: 0
-
-      - name: Get changed files
-        id: changed-files-specific
-        uses: tj-actions/changed-files@v41
-        with:
-          files: |
-            README.md
+          fetch-depth: 2
 
       - name: Check if README is modified
         id: step_output
-        if: steps.changed-files-specific.outputs.any_changed == 'true'
         run: |
-          echo "readme_changed=true" >> $GITHUB_OUTPUT
+          if git diff --name-only HEAD^ HEAD | grep -q "README.md"; then
+            echo "readme_changed=true" >> $GITHUB_OUTPUT
+          else
+            echo "readme_changed=false" >> $GITHUB_OUTPUT
+          fi
 
   publish-docarray-org:
     needs: check-readme-modification

diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -25,7 +25,7 @@ jobs:
       - name: Lint with ruff
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install 
 
           # stop the build if there are Python syntax errors or undefined names
@@ -44,7 +44,7 @@ jobs:
       - name: check black
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --only dev 
           poetry run black --check .
 
@@ -62,7 +62,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --without dev
           poetry run pip install tensorflow==2.12.0
           poetry run pip install jax
@@ -106,7 +106,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras
           poetry run pip install elasticsearch==8.6.2
           ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
@@ -156,7 +156,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras          
           ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
           poetry run pip install protobuf==3.20.0 # we check that we support 3.19
@@ -204,7 +204,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras
           ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
           poetry run pip install protobuf==3.20.0
@@ -253,7 +253,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras
           ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
           poetry run pip install protobuf==3.20.0
@@ -302,7 +302,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras
           ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
           poetry run pip install protobuf==3.20.0
@@ -351,7 +351,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras
           ./scripts/install_pydantic_v2.sh ${{ matrix.pydantic-version }}
           poetry run pip uninstall -y torch
@@ -398,7 +398,7 @@ jobs:
       - name: Prepare environment
         run: |
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           poetry install --all-extras
           poetry run pip uninstall -y torch
           poetry run pip install torch

diff --git a/.github/workflows/ci_only_pr.yml b/.github/workflows/ci_only_pr.yml
@@ -43,7 +43,7 @@ jobs:
         run: |
           npm i -g netlify-cli
           python -m pip install --upgrade pip
-          python -m pip install poetry
+          python -m pip install poetry==1.7.1
           python -m poetry config virtualenvs.create false && python -m poetry install --no-interaction --no-ansi --all-extras
 
           cd docs

diff --git a/docarray/__init__.py b/docarray/__init__.py
@@ -20,6 +20,60 @@
 from docarray.array import DocList, DocVec
 from docarray.base_doc.doc import BaseDoc
 from docarray.utils._internal.misc import _get_path_from_docarray_root_level
+from docarray.utils._internal.pydantic import is_pydantic_v2
+
+
+def unpickle_doclist(doc_type, b):
+    return DocList[doc_type].from_bytes(b, protocol="protobuf")
+
+
+def unpickle_docvec(doc_type, tensor_type, b):
+    return DocVec[doc_type].from_bytes(b, protocol="protobuf", tensor_type=tensor_type)
+
+
+if is_pydantic_v2:
+    # Register the pickle functions
+    def register_serializers():
+        import copyreg
+        from functools import partial
+
+        unpickle_doc_fn = partial(BaseDoc.from_bytes, protocol="protobuf")
+
+        def pickle_doc(doc):
+            b = doc.to_bytes(protocol='protobuf')
+            return unpickle_doc_fn, (doc.__class__, b)
+
+        # Register BaseDoc serialization
+        copyreg.pickle(BaseDoc, pickle_doc)
+
+        # For DocList, we need to hook into __reduce__ since it's a generic
+
+        def pickle_doclist(doc_list):
+            b = doc_list.to_bytes(protocol='protobuf')
+            doc_type = doc_list.doc_type
+            return unpickle_doclist, (doc_type, b)
+
+        # Replace DocList.__reduce__ with a method that returns the correct format
+        def doclist_reduce(self):
+            return pickle_doclist(self)
+
+        DocList.__reduce__ = doclist_reduce
+
+        # For DocVec, we need to hook into __reduce__ since it's a generic
+
+        def pickle_docvec(doc_vec):
+            b = doc_vec.to_bytes(protocol='protobuf')
+            doc_type = doc_vec.doc_type
+            tensor_type = doc_vec.tensor_type
+            return unpickle_docvec, (doc_type, tensor_type, b)
+
+        # Replace DocList.__reduce__ with a method that returns the correct format
+        def docvec_reduce(self):
+            return pickle_docvec(self)
+
+        DocVec.__reduce__ = docvec_reduce
+
+    register_serializers()
 
 __all__ = ['BaseDoc', 'DocList', 'DocVec']
 

diff --git a/docarray/array/any_array.py b/docarray/array/any_array.py
@@ -25,6 +25,7 @@
 from docarray.exceptions.exceptions import UnusableObjectError
 from docarray.typing.abstract_type import AbstractType
 from docarray.utils._internal._typing import change_cls_name, safe_issubclass
+from docarray.utils._internal.pydantic import is_pydantic_v2
 
 if TYPE_CHECKING:
     from docarray.proto import DocListProto, NodeProto
@@ -73,8 +74,19 @@ def __class_getitem__(cls, item: Union[Type[BaseDocWithoutId], TypeVar, str]):
             # Promote to global scope so multiprocessing can pickle it
             global _DocArrayTyped
 
-            class _DocArrayTyped(cls):  # type: ignore
-                doc_type: Type[BaseDocWithoutId] = cast(Type[BaseDocWithoutId], item)
+            if not is_pydantic_v2:
+
+                class _DocArrayTyped(cls):  # type: ignore
+                    doc_type: Type[BaseDocWithoutId] = cast(
+                        Type[BaseDocWithoutId], item
+                    )
+
+            else:
+
+                class _DocArrayTyped(cls, Generic[T_doc]):  # type: ignore
+                    doc_type: Type[BaseDocWithoutId] = cast(
+                        Type[BaseDocWithoutId], item
+                    )
 
             for field in _DocArrayTyped.doc_type._docarray_fields().keys():
 
@@ -99,14 +111,24 @@ def _setter(self, value):
                 setattr(_DocArrayTyped, field, _property_generator(field))
                 # this generates property on the fly based on the schema of the item
 
-            # The global scope and qualname need to refer to this class a unique name.
-            # Otherwise, creating another _DocArrayTyped will overwrite this one.
-            change_cls_name(
-                _DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals()
-            )
-
-            cls.__typed_da__[cls][item] = _DocArrayTyped
+            # # The global scope and qualname need to refer to this class a unique name.
+            # # Otherwise, creating another _DocArrayTyped will overwrite this one.
+            if not is_pydantic_v2:
+                change_cls_name(
+                    _DocArrayTyped, f'{cls.__name__}[{item.__name__}]', globals()
+                )
 
+                cls.__typed_da__[cls][item] = _DocArrayTyped
+            else:
+                change_cls_name(_DocArrayTyped, f'{cls.__name__}', globals())
+                if sys.version_info < (3, 12):
+                    cls.__typed_da__[cls][item] = Generic.__class_getitem__.__func__(
+                        _DocArrayTyped, item
+                    )  # type: ignore
+                    # this do nothing that checking that item is valid type var or str
+                    # Keep the approach in #1147 to be compatible with lower versions of Python.
+                else:
+                    cls.__typed_da__[cls][item] = GenericAlias(_DocArrayTyped, item)  # type: ignore
         return cls.__typed_da__[cls][item]
 
     @overload

diff --git a/docarray/array/doc_list/doc_list.py b/docarray/array/doc_list/doc_list.py
@@ -12,6 +12,7 @@
     Union,
     cast,
     overload,
+    Callable,
 )
 
 from pydantic import parse_obj_as
@@ -28,7 +29,6 @@
 from docarray.utils._internal.pydantic import is_pydantic_v2
 
 if is_pydantic_v2:
-    from pydantic import GetCoreSchemaHandler
     from pydantic_core import core_schema
 
 from docarray.utils._internal._typing import safe_issubclass
@@ -45,10 +45,7 @@
 
 
 class DocList(
-    ListAdvancedIndexing[T_doc],
-    PushPullMixin,
-    IOMixinDocList,
-    AnyDocArray[T_doc],
+    ListAdvancedIndexing[T_doc], PushPullMixin, IOMixinDocList, AnyDocArray[T_doc]
 ):
     """
      DocList is a container of Documents.
@@ -357,8 +354,20 @@ def __repr__(self):
 
         @classmethod
         def __get_pydantic_core_schema__(
-            cls, _source_type: Any, _handler: GetCoreSchemaHandler
+            cls, source: Any, handler: Callable[[Any], core_schema.CoreSchema]
         ) -> core_schema.CoreSchema:
-            return core_schema.general_plain_validator_function(
-                cls.validate,
+            instance_schema = core_schema.is_instance_schema(cls)
+            args = getattr(source, '__args__', None)
+            if args:
+                sequence_t_schema = handler(Sequence[args[0]])
+            else:
+                sequence_t_schema = handler(Sequence)
+
+            def validate_fn(v, info):
+                # input has already been validated
+                return cls(v, validate_input_docs=False)
+
+            non_instance_schema = core_schema.with_info_after_validator_function(
+                validate_fn, sequence_t_schema
             )
+            return core_schema.union_schema([instance_schema, non_instance_schema])
diff --git a/docarray/array/doc_list/io.py b/docarray/array/doc_list/io.py
@@ -256,7 +256,6 @@ def to_bytes(
         :param show_progress: show progress bar, only works when protocol is `pickle` or `protobuf`
         :return: the binary serialization in bytes or None if file_ctx is passed where to store
         """
-
         with file_ctx or io.BytesIO() as bf:
             self._write_bytes(
                 bf=bf,

diff --git a/docarray/array/doc_vec/doc_vec.py b/docarray/array/doc_vec/doc_vec.py
@@ -198,7 +198,7 @@ def _check_doc_field_not_none(field_name, doc):
                 if safe_issubclass(tensor.__class__, tensor_type):
                     field_type = tensor_type
 
-            if isinstance(field_type, type):
+            if isinstance(field_type, type) or safe_issubclass(field_type, AnyDocArray):
                 if tf_available and safe_issubclass(field_type, TensorFlowTensor):
                     # tf.Tensor does not allow item assignment, therefore the
                     # optimized way
@@ -335,7 +335,9 @@ def _docarray_validate(
                 return cast(T, value.to_doc_vec())
             else:
                 raise ValueError(f'DocVec[value.doc_type] is not compatible with {cls}')
-        elif isinstance(value, DocList.__class_getitem__(cls.doc_type)):
+        elif not is_pydantic_v2 and isinstance(
+            value, DocList.__class_getitem__(cls.doc_type)
+        ):
             return cast(T, value.to_doc_vec())
         elif isinstance(value, Sequence):
             return cls(value)

diff --git a/docarray/base_doc/doc.py b/docarray/base_doc/doc.py
@@ -326,8 +326,13 @@ def _exclude_doclist(
             from docarray.array.any_array import AnyDocArray
 
             type_ = self._get_field_annotation(field)
-            if isinstance(type_, type) and safe_issubclass(type_, AnyDocArray):
-                doclist_exclude_fields.append(field)
+            if is_pydantic_v2:
+                # Conservative when touching pydantic v1 logic
+                if safe_issubclass(type_, AnyDocArray):
+                    doclist_exclude_fields.append(field)
+            else:
+                if isinstance(type_, type) and safe_issubclass(type_, AnyDocArray):
+                    doclist_exclude_fields.append(field)
 
         original_exclude = exclude
         if exclude is None:
@@ -480,7 +485,6 @@ def model_dump(  # type: ignore
             warnings: bool = True,
         ) -> Dict[str, Any]:
             def _model_dump(doc):
-
                 (
                     exclude_,
                     original_exclude,

diff --git a/docarray/base_doc/mixins/update.py b/docarray/base_doc/mixins/update.py
@@ -110,9 +110,7 @@ def _group_fields(doc: 'UpdateMixin') -> _FieldGroups:
                 if field_name not in FORBIDDEN_FIELDS_TO_UPDATE:
                     field_type = doc._get_field_annotation(field_name)
 
-                    if isinstance(field_type, type) and safe_issubclass(
-                        field_type, DocList
-                    ):
+                    if safe_issubclass(field_type, DocList):
                         nested_docarray_fields.append(field_name)
                     else:
                         origin = get_origin(field_type)

diff --git a/docarray/index/backends/elastic.py b/docarray/index/backends/elastic.py
@@ -352,12 +352,12 @@ def python_type_to_db_type(self, python_type: Type) -> Any:
             dict: 'object',
         }
 
-        for type in elastic_py_types.keys():
-            if safe_issubclass(python_type, type):
+        for t in elastic_py_types.keys():
+            if safe_issubclass(python_type, t):
                 self._logger.info(
-                    f'Mapped Python type {python_type} to database type "{elastic_py_types[type]}"'
+                    f'Mapped Python type {python_type} to database type "{elastic_py_types[t]}"'
                 )
-                return elastic_py_types[type]
+                return elastic_py_types[t]
 
         err_msg = f'Unsupported column type for {type(self)}: {python_type}'
         self._logger.error(err_msg)