From dff149de72239fe286a39bb85cc096e57d5f4db0 Mon Sep 17 00:00:00 2001 From: Parashar Date: Thu, 1 Aug 2024 11:22:44 +0200 Subject: [PATCH] formatting and import optimizations --- scarf/__init__.py | 1 + scarf/ann.py | 8 ++++---- scarf/assay.py | 7 ++++--- scarf/bio_data.py | 1 + scarf/datastore/base_datastore.py | 4 +++- scarf/datastore/datastore.py | 6 ++++-- scarf/datastore/graph_datastore.py | 8 +++++--- scarf/datastore/mapping_datastore.py | 6 ++++-- scarf/dendrogram.py | 4 +++- scarf/downloader.py | 6 ++++-- scarf/feat_utils.py | 6 ++++-- scarf/harmony.py | 4 +++- scarf/knn_utils.py | 8 +++++--- scarf/mapping_utils.py | 7 +++++-- scarf/markers.py | 11 +++++++---- scarf/meld_assay.py | 11 ++++++----- scarf/metadata.py | 6 ++++-- scarf/plots.py | 10 ++++++---- scarf/readers.py | 8 +++++--- scarf/tests/__init__.py | 1 + scarf/tests/conftest.py | 4 +--- scarf/tests/fixtures_datastore.py | 5 +++-- scarf/tests/fixtures_downloader.py | 1 + scarf/tests/fixtures_readers.py | 1 + scarf/tests/test_datastore.py | 7 +++---- scarf/tests/test_metadata.py | 3 ++- scarf/tests/test_writers.py | 3 ++- scarf/umap.py | 1 + scarf/utils.py | 10 +++++----- scarf/writers.py | 10 ++++++---- setup.py | 12 ++++++------ 31 files changed, 110 insertions(+), 70 deletions(-) diff --git a/scarf/__init__.py b/scarf/__init__.py index 90eb25c..8ca465f 100644 --- a/scarf/__init__.py +++ b/scarf/__init__.py @@ -33,6 +33,7 @@ """ import warnings + from dask.array import PerformanceWarning from importlib_metadata import version diff --git a/scarf/ann.py b/scarf/ann.py index 81114ce..845435e 100644 --- a/scarf/ann.py +++ b/scarf/ann.py @@ -1,12 +1,12 @@ from typing import Optional + +import dask.array as da import numpy as np import pandas as pd -import dask.array as da - from threadpoolctl import threadpool_limits -from .utils import controlled_compute, logger, tqdmbar -from .harmony import run_harmony +from .harmony import run_harmony +from .utils import controlled_compute, logger, tqdmbar __all__ = ["AnnStream", "instantiate_knn_index", "fix_knn_query"] diff --git a/scarf/assay.py b/scarf/assay.py index 7e63b76..477d8c0 100644 --- a/scarf/assay.py +++ b/scarf/assay.py @@ -9,14 +9,15 @@ method for feature selection. """ - from typing import Tuple, List, Generator, Optional, Union + import numpy as np +import pandas as pd from dask.array.core import Array as daskArrayType from dask.array.core import from_zarr -from zarr import hierarchy as z_hierarchy from scipy.sparse import csr_matrix, vstack -import pandas as pd +from zarr import hierarchy as z_hierarchy + from .metadata import MetaData from .utils import show_dask_progress, controlled_compute, logger diff --git a/scarf/bio_data.py b/scarf/bio_data.py index f05c6e1..b6b7595 100644 --- a/scarf/bio_data.py +++ b/scarf/bio_data.py @@ -2,6 +2,7 @@ g. cell cycle genes). """ + from typing import List __all__ = ["s_phase_genes", "g2m_phase_genes"] diff --git a/scarf/datastore/base_datastore.py b/scarf/datastore/base_datastore.py index d3a127f..4a011e4 100644 --- a/scarf/datastore/base_datastore.py +++ b/scarf/datastore/base_datastore.py @@ -1,10 +1,12 @@ from typing import List, Union, Optional + import numpy as np import zarr from loguru import logger -from ..utils import show_dask_progress, controlled_compute, load_zarr, ZARRLOC + from ..assay import RNAassay, ATACassay, ADTassay, Assay from ..metadata import MetaData +from ..utils import show_dask_progress, controlled_compute, load_zarr, ZARRLOC def sanitize_hierarchy( diff --git a/scarf/datastore/datastore.py b/scarf/datastore/datastore.py index 0086cdd..440ba79 100644 --- a/scarf/datastore/datastore.py +++ b/scarf/datastore/datastore.py @@ -1,13 +1,15 @@ from typing import Iterable, Optional, Union, List, Literal, Tuple + import numpy as np import pandas as pd from dask import array as daskarr from loguru import logger + from .mapping_datastore import MappingDatastore -from ..writers import create_zarr_obj_array, create_zarr_dataset -from ..utils import tqdmbar, controlled_compute, ZARRLOC from ..assay import RNAassay, ATACassay from ..feat_utils import hto_demux +from ..utils import tqdmbar, controlled_compute, ZARRLOC +from ..writers import create_zarr_obj_array, create_zarr_dataset __all__ = ["DataStore"] diff --git a/scarf/datastore/graph_datastore.py b/scarf/datastore/graph_datastore.py index 60b2798..5235f94 100644 --- a/scarf/datastore/graph_datastore.py +++ b/scarf/datastore/graph_datastore.py @@ -1,13 +1,15 @@ -from typing import Tuple, Optional, Union, List, Callable import os +from typing import Tuple, Optional, Union, List, Callable + import numpy as np import pandas as pd +from dask.array import from_zarr # type: ignore from loguru import logger from scipy.sparse import csr_matrix, coo_matrix -from dask.array import from_zarr # type: ignore + from .base_datastore import BaseDataStore -from ..utils import clean_array, show_dask_progress, system_call, tqdmbar from ..assay import Assay +from ..utils import clean_array, show_dask_progress, system_call, tqdmbar from ..writers import create_zarr_dataset diff --git a/scarf/datastore/mapping_datastore.py b/scarf/datastore/mapping_datastore.py index 0712b28..2a072e3 100644 --- a/scarf/datastore/mapping_datastore.py +++ b/scarf/datastore/mapping_datastore.py @@ -1,13 +1,14 @@ -from typing import Generator, Tuple, List, Dict, Union, Callable, Optional import os +from typing import Generator, Tuple, List, Dict, Union, Callable, Optional + import numpy as np import pandas as pd from dask import array as daskarr from loguru import logger from scipy.sparse import csr_matrix + from .graph_datastore import GraphDataStore from ..assay import Assay, RNAassay -from ..writers import create_zarr_dataset from ..utils import ( show_dask_progress, clean_array, @@ -15,6 +16,7 @@ controlled_compute, system_call, ) +from ..writers import create_zarr_dataset class MappingDatastore(GraphDataStore): diff --git a/scarf/dendrogram.py b/scarf/dendrogram.py index b1e0aff..1c70ca6 100644 --- a/scarf/dendrogram.py +++ b/scarf/dendrogram.py @@ -1,6 +1,8 @@ -import numpy as np from typing import List, Dict + import networkx as nx +import numpy as np + from .utils import logger, tqdmbar __all__ = ["BalancedCut", "CoalesceTree", "make_digraph"] diff --git a/scarf/downloader.py b/scarf/downloader.py index 286e4fe..14147f6 100644 --- a/scarf/downloader.py +++ b/scarf/downloader.py @@ -10,12 +10,14 @@ - fetch_dataset: Downloads datasets from online repositories and saves them in as-is format """ +import io import os import tarfile -import pandas as pd -import io import time from json import JSONDecodeError + +import pandas as pd + from .utils import logger, tqdmbar __all__ = ["show_available_datasets", "fetch_dataset"] diff --git a/scarf/feat_utils.py b/scarf/feat_utils.py index bd16c73..0e53287 100644 --- a/scarf/feat_utils.py +++ b/scarf/feat_utils.py @@ -1,8 +1,10 @@ """Utility functions for features.""" -import pandas as pd -import numpy as np + from typing import List, Tuple +import numpy as np +import pandas as pd + __all__ = ["fit_lowess", "binned_sampling", "hto_demux"] diff --git a/scarf/harmony.py b/scarf/harmony.py index 1d7f117..291bdf1 100644 --- a/scarf/harmony.py +++ b/scarf/harmony.py @@ -1,7 +1,9 @@ from functools import partial -import pandas as pd + import numpy as np +import pandas as pd from sklearn.cluster import KMeans + from .utils import tqdmbar, logger diff --git a/scarf/knn_utils.py b/scarf/knn_utils.py index 196316d..9260654 100644 --- a/scarf/knn_utils.py +++ b/scarf/knn_utils.py @@ -1,13 +1,15 @@ """Utility functions for running the KNN algorithm.""" + from typing import List, Tuple + import numpy as np import pandas as pd -from scipy.sparse import csr_matrix, coo_matrix from numba import jit -from .writers import create_zarr_dataset +from scipy.sparse import csr_matrix, coo_matrix + from .ann import AnnStream from .utils import tqdmbar, controlled_compute - +from .writers import create_zarr_dataset __all__ = [ "self_query_knn", diff --git a/scarf/mapping_utils.py b/scarf/mapping_utils.py index c20ab80..76ab5aa 100644 --- a/scarf/mapping_utils.py +++ b/scarf/mapping_utils.py @@ -1,10 +1,13 @@ """Utility functions for the mapping.""" + +from typing import Tuple + import dask.array as daskarr import numpy as np -from typing import Tuple +import pandas as pd + from .assay import Assay from .utils import controlled_compute, show_dask_progress, logger, tqdmbar -import pandas as pd __all__ = ["align_features", "coral"] diff --git a/scarf/markers.py b/scarf/markers.py index a6f4053..00c3132 100644 --- a/scarf/markers.py +++ b/scarf/markers.py @@ -1,13 +1,16 @@ """Module to find biomarkers.""" -from scarf.assay import Assay -from scarf.utils import logger, tqdmbar -from numba import jit + +from typing import Optional + import numpy as np import pandas as pd +from numba import jit from scipy.stats import linregress -from typing import Optional from scipy.stats import rankdata +from scarf.assay import Assay +from scarf.utils import logger, tqdmbar + def read_prenormed_batches(store, cell_idx: np.ndarray, batch_size: int, desc: str): batch = {} diff --git a/scarf/meld_assay.py b/scarf/meld_assay.py index 73f8e8e..16a45d3 100644 --- a/scarf/meld_assay.py +++ b/scarf/meld_assay.py @@ -10,17 +10,18 @@ - coordinate_melding: """ +import gzip import logging from typing import Tuple, List, Union -import pandas as pd + import numpy as np -import gzip +import pandas as pd from numba import jit -from zarr import hierarchy from scipy.sparse import coo_matrix -from .writers import create_zarr_count_assay -from .utils import controlled_compute, logger, tqdmbar +from zarr import hierarchy +from .utils import controlled_compute, logger, tqdmbar +from .writers import create_zarr_count_assay __all__ = ["GffReader", "coordinate_melding"] diff --git a/scarf/metadata.py b/scarf/metadata.py index 63ea4ad..4c501ca 100644 --- a/scarf/metadata.py +++ b/scarf/metadata.py @@ -3,12 +3,14 @@ import re from typing import List, Iterable, Any, Dict, Tuple, Optional, Union + import numpy as np -from zarr import hierarchy as z_hierarchy import pandas as pd +from zarr import hierarchy as z_hierarchy + from .feat_utils import fit_lowess -from .writers import create_zarr_obj_array from .utils import logger +from .writers import create_zarr_obj_array zarrGroup = z_hierarchy.Group diff --git a/scarf/plots.py b/scarf/plots.py index 09ac69d..352d779 100644 --- a/scarf/plots.py +++ b/scarf/plots.py @@ -1,11 +1,14 @@ """Contains the code for plotting in Scarf.""" -import matplotlib.pyplot as plt + +from typing import Tuple, Optional, Union, List + import matplotlib as mpl -import seaborn as sns +import matplotlib.pyplot as plt import numpy as np import pandas as pd -from typing import Tuple, Optional, Union, List +import seaborn as sns from cmocean import cm + from .utils import logger plt.rcParams["svg.fonttype"] = "none" @@ -1258,7 +1261,6 @@ def plot_annotated_heatmap( save_dpi: int = 300, show_fig: bool = True, ): - import matplotlib.gridspec as gridspec import matplotlib.ticker as mticker if display_row_labels is None: diff --git a/scarf/readers.py b/scarf/readers.py index 940df7b..9ab16c2 100644 --- a/scarf/readers.py +++ b/scarf/readers.py @@ -9,14 +9,16 @@ - LoomReader: A class to read in data in the form of a Loom file. """ +import os from abc import ABC, abstractmethod from typing import Generator, Dict, List, Optional, Tuple +from typing import IO + +import h5py import numpy as np import pandas as pd -import os from scipy.sparse import coo_matrix -from typing import IO -import h5py + from .utils import logger, tqdmbar __all__ = [ diff --git a/scarf/tests/__init__.py b/scarf/tests/__init__.py index ed1a9bc..8794fad 100644 --- a/scarf/tests/__init__.py +++ b/scarf/tests/__init__.py @@ -1,6 +1,7 @@ import os import shutil import sys + from ..utils import logger logger.remove() diff --git a/scarf/tests/conftest.py b/scarf/tests/conftest.py index 3f4a0d2..8b13789 100644 --- a/scarf/tests/conftest.py +++ b/scarf/tests/conftest.py @@ -1,3 +1 @@ -from .fixtures_downloader import * -from .fixtures_readers import * -from .fixtures_datastore import * + diff --git a/scarf/tests/fixtures_datastore.py b/scarf/tests/fixtures_datastore.py index 2b89bdc..c8432d7 100644 --- a/scarf/tests/fixtures_datastore.py +++ b/scarf/tests/fixtures_datastore.py @@ -1,7 +1,8 @@ -import pytest -from . import full_path, remove import numpy as np import pandas as pd +import pytest + +from . import full_path, remove @pytest.fixture(scope="session") diff --git a/scarf/tests/fixtures_downloader.py b/scarf/tests/fixtures_downloader.py index c128020..63428b4 100644 --- a/scarf/tests/fixtures_downloader.py +++ b/scarf/tests/fixtures_downloader.py @@ -1,4 +1,5 @@ import pytest + from . import full_path, remove diff --git a/scarf/tests/fixtures_readers.py b/scarf/tests/fixtures_readers.py index 5904261..a1b8b74 100644 --- a/scarf/tests/fixtures_readers.py +++ b/scarf/tests/fixtures_readers.py @@ -1,4 +1,5 @@ import pytest + from . import full_path, remove diff --git a/scarf/tests/test_datastore.py b/scarf/tests/test_datastore.py index e152ff2..f2cfd2d 100644 --- a/scarf/tests/test_datastore.py +++ b/scarf/tests/test_datastore.py @@ -1,5 +1,6 @@ -import pandas as pd import numpy as np +import pandas as pd + from . import full_path, remove @@ -174,9 +175,7 @@ def test_run_pseudotime_marker_search(self, pseudotime_markers): full_path("pseudotime_markers_r_values.csv"), index_col=0 ) assert np.all(precalc_markers.index == pseudotime_markers.index) - assert np.all( - precalc_markers.names.values == pseudotime_markers.names.values - ) + assert np.all(precalc_markers.names.values == pseudotime_markers.names.values) assert np.allclose( precalc_markers.I__RNA_pseudotime__r.values, pseudotime_markers.I__RNA_pseudotime__r.values, diff --git a/scarf/tests/test_metadata.py b/scarf/tests/test_metadata.py index 4f19625..c5ffc0c 100644 --- a/scarf/tests/test_metadata.py +++ b/scarf/tests/test_metadata.py @@ -1,5 +1,6 @@ -import pytest import numpy as np +import pytest + from . import full_path, remove diff --git a/scarf/tests/test_writers.py b/scarf/tests/test_writers.py index 5738ffa..552ab49 100644 --- a/scarf/tests/test_writers.py +++ b/scarf/tests/test_writers.py @@ -1,6 +1,7 @@ -from . import full_path, remove import numpy as np +from . import full_path, remove + def test_crtozarr(crh5_reader): from ..writers import CrToZarr diff --git a/scarf/umap.py b/scarf/umap.py index 195599f..22f4bc7 100644 --- a/scarf/umap.py +++ b/scarf/umap.py @@ -4,6 +4,7 @@ # License: BSD 3 clause import locale + from .utils import logger locale.setlocale(locale.LC_NUMERIC, "C") diff --git a/scarf/utils.py b/scarf/utils.py index acac961..66cd6ca 100644 --- a/scarf/utils.py +++ b/scarf/utils.py @@ -9,15 +9,15 @@ - rolling_window: applies rolling window mean over a vector """ -from typing import Union, Optional, TypeAlias -from loguru import logger import sys +from typing import Union, Optional, TypeAlias + import numpy as np -from tqdm.dask import TqdmCallback +import zarr from dask.array.core import Array +from loguru import logger from numba import jit -import zarr - +from tqdm.dask import TqdmCallback __all__ = [ "logger", diff --git a/scarf/writers.py b/scarf/writers.py index d1ce01b..b14d964 100644 --- a/scarf/writers.py +++ b/scarf/writers.py @@ -18,12 +18,15 @@ - LoomToZarr: A class for converting data in a Loom file to a Zarr hierarchy. """ -import zarr +import os from typing import Any, Tuple, List, Union, Dict, Optional + import numpy as np -from .readers import CrReader, H5adReader, NaboH5Reader, LoomReader, CSVReader -import os import pandas as pd +import zarr +from scipy.sparse import csr_matrix, coo_matrix + +from .readers import CrReader, H5adReader, NaboH5Reader, LoomReader, CSVReader from .utils import ( controlled_compute, logger, @@ -32,7 +35,6 @@ load_zarr, ZARRLOC, ) -from scipy.sparse import csr_matrix, coo_matrix __all__ = [ "create_zarr_dataset", diff --git a/setup.py b/setup.py index 2824aaa..4b86e97 100644 --- a/setup.py +++ b/setup.py @@ -37,13 +37,13 @@ def run(self): if __name__ == "__main__": version = read("VERSION").rstrip("\n") - core_requirements = read_lines('requirements.txt') - extra_requirements = read_lines('requirements_extra.txt') + core_requirements = read_lines("requirements.txt") + extra_requirements = read_lines("requirements_extra.txt") setup( name="scarf", version=version, - python_requires='>=3.11', + python_requires=">=3.11", description="Scarf: A scalable tool for single-cell omics data analysis", long_description=read("pypi_README.rst"), long_description_content_type="text/x-rst", @@ -58,12 +58,12 @@ def run(self): "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.12", ], - keywords=['single-cell'], + keywords=["single-cell"], install_requires=core_requirements, extras_require={ - 'extra': extra_requirements, + "extra": extra_requirements, }, - packages=find_packages(exclude=['tests*']), + packages=find_packages(exclude=["tests*"]), include_package_data=False, cmdclass={"install": PostInstallCommand}, )