diff --git a/.codespellrc b/.codespellrc new file mode 100644 index 0000000000..a38787e126 --- /dev/null +++ b/.codespellrc @@ -0,0 +1,18 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +[codespell] +ignore-words-list = BoundIn,fo,MoR,NotIn,notIn,oT diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c0b9a31792..bdd1f362b5 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -69,6 +69,10 @@ repos: # --line-length is set to a high value to deal with very long lines - --line-length - '99999' + - repo: https://github.com/codespell-project/codespell + rev: v2.3.0 + hooks: + - id: codespell ci: autofix_commit_msg: | [pre-commit.ci] auto fixes from pre-commit.com hooks diff --git a/pyiceberg/avro/reader.py b/pyiceberg/avro/reader.py index 988bd42ba4..a5578680d6 100644 --- a/pyiceberg/avro/reader.py +++ b/pyiceberg/avro/reader.py @@ -51,7 +51,7 @@ def _skip_map_array(decoder: BinaryDecoder, skip_entry: Callable[[], None]) -> None: """Skips over an array or map. - Both the array and map are encoded similar, and we can re-use + Both the array and map are encoded similar, and we can reuse the logic of skipping in an efficient way. From the Avro spec: diff --git a/pyiceberg/io/pyarrow.py b/pyiceberg/io/pyarrow.py index ef6937f1bb..e8c9f64d63 100644 --- a/pyiceberg/io/pyarrow.py +++ b/pyiceberg/io/pyarrow.py @@ -1536,7 +1536,7 @@ def _to_requested_schema( include_field_ids: bool = False, use_large_types: bool = True, ) -> pa.RecordBatch: - # We could re-use some of these visitors + # We could reuse some of these visitors struct_array = visit_with_partner( requested_schema, batch, diff --git a/pyiceberg/utils/singleton.py b/pyiceberg/utils/singleton.py index 8a4bbf91ce..06ee62febe 100644 --- a/pyiceberg/utils/singleton.py +++ b/pyiceberg/utils/singleton.py @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. """ -This is a singleton metaclass that can be used to cache and re-use existing objects. +This is a singleton metaclass that can be used to cache and reuse existing objects. In the Iceberg codebase we have a lot of objects that are stateless (for example Types such as StringType, BooleanType etc). FixedTypes have arguments (eg. Fixed[22]) that we also make part of the key when caching diff --git a/tests/test_transforms.py b/tests/test_transforms.py index 7ebab87e3a..6d04a1e4ce 100644 --- a/tests/test_transforms.py +++ b/tests/test_transforms.py @@ -899,7 +899,7 @@ def test_projection_truncate_string_set_same_result(bound_reference_str: BoundRe def test_projection_truncate_string_set_in(bound_reference_str: BoundReference[str]) -> None: assert TruncateTransform(3).project( "name", BoundIn(term=bound_reference_str, literals={literal("hello"), literal("world")}) - ) == In(term="name", literals={literal("hel"), literal("wor")}) + ) == In(term="name", literals={literal("hel"), literal("wor")}) # codespell:ignore hel def test_projection_truncate_string_set_not_in(bound_reference_str: BoundReference[str]) -> None: