biocommons · jsstevenson · Aug 29, 2025
@@ -1,15 +1,14 @@
 version = 1
 
 test_patterns = [
-  
+
 ]
 
 exclude_patterns = [
-  
+
 ]
 
 [[analyzers]]
 name = 'python'
 enabled = true
 runtime_version = '3.x.x'
-
@@ -26,4 +26,4 @@ jobs:
             https://raw.githubusercontent.com/biocommons/.github/main/etc/labels.yml
             .github/labels.yml
 
-          delete-other-labels: false
+          delete-other-labels: false
@@ -44,6 +44,20 @@ jobs:
       run: |
         ruff format --check .
 
+    - name: Ensure EOF line
+      uses: pre-commit/[email protected]
+      with:
+        extra_args: end-of-file-fixer --all-files
+
+    - name: Fix trailing whitespace
+      uses: pre-commit/[email protected]
+      with:
+        extra_args: trailing-whitespace --all-files
+
+    - name: Fix mixed line endings
+      uses: pre-commit/[email protected]
+      with:
+        extra_args: mixed-line-ending --all-files
   test:
     runs-on: ubuntu-latest
 
@@ -136,4 +150,4 @@ jobs:
           name: python-package-distributions
           path: dist/
       - name: Publish distribution to PyPI
-        uses: pypa/gh-action-pypi-publish@release/v1
+        uses: pypa/gh-action-pypi-publish@release/v1
@@ -1,6 +1,20 @@
 repos:
+- repo: https://github.com/pre-commit/pre-commit-hooks
+  rev: v5.0.0
+  hooks:
+    - id: check-added-large-files
+    - id: detect-private-key
+    - id: trailing-whitespace
+    - id: end-of-file-fixer
+    - id: check-merge-conflict
+    - id: detect-aws-credentials
+      args: [ --allow-missing-credentials ]
+    - id: mixed-line-ending
+      args: [ --fix=lf ]
 - repo: https://github.com/astral-sh/ruff-pre-commit
-  rev: v0.4.4
+  rev: v0.12.8
   hooks:
     - id: ruff-format
-      args: [ --check ]
+    - id: ruff
+      args: [ --fix, --exit-non-zero-on-fix ]
+minimum_pre_commit_version: 4.2.0
@@ -63,14 +63,7 @@ build: %:
 
 ############################################################################
 #= TESTING
-# see test configuration in setup.cfg
-
-#=> cqa: execute code quality tests
-cqa:
-	flake8 src --count --select=E9,F63,F7,F82 --show-source --statistics
-	isort --profile black --check src
-	ruff format --check src
-	bandit -ll -r src
+# see test configuration in pyproject.toml
 
 #=> test: execute tests
 #=> test-code: test code (including embedded doctests)
@@ -83,6 +76,17 @@ test-code:
 test-docs:
 	pytest docs
 
+#=> cqa: execute code quality tests
+cqa:
+	ruff format --check
+	ruff check
+
+#=> reformat: reformat code
+.PHONY: reformat
+reformat:
+	ruff check --fix
+	ruff format
+
 #=> tox -- run all tox tests
 tox:
 	tox

@@ -6,12 +6,12 @@ bioutils -- bioinformatics utilities and lookup tables
 
 bioutils provides some common utilities and lookup tables for bioinformatics.
 
-* bioutils.accessions -- parse accessions, infer namespaces 
+* bioutils.accessions -- parse accessions, infer namespaces
 * bioutils.assemblies -- Human assembly information (from NCBI/GRCh)
 * bioutils.cytobands -- map cytobands to coordinates (from UCSC cytoband tables)
 * bioutils.digests -- implementations of various digests
 * bioutils.normalize -- allele normalization (left shuffle, right shuffle, expanded, vcf)
-  
+
 
 To use an E-Utilities API key run add it to an environment variable
 called `ncbi_api_key` and it will be used in the E-Utilities request.
@@ -38,11 +38,10 @@ called `ncbi_api_key` and it will be used in the E-Utilities request.
 .. |pypi_badge| image:: https://img.shields.io/pypi/v/bioutils.svg
   :target: https://pypi.org/project/bioutils/
 
-	   
+
 .. |cc_badge| image:: https://api.codeclimate.com/v1/badges/3a99e06ad0a842174b0a/maintainability
    :target: https://codeclimate.com/github/biocommons/bioutils/maintainability
    :alt: Maintainability
 
 .. |cov_badge| image:: https://coveralls.io/repos/github/biocommons/bioutils/badge.svg?branch=master
    :target: https://coveralls.io/github/biocommons/bioutils?branch=master
-
@@ -2,12 +2,12 @@
 
 """compute and display ga4gh sequence identifiers for sequences in a fasta file
 
-snafu$ ./bin/fasta-ga4gh-identifier ~/Downloads/GCA_000001405.28_GRCh38.p13_genomic.fna.gz 
+snafu$ ./bin/fasta-ga4gh-identifier ~/Downloads/GCA_000001405.28_GRCh38.p13_genomic.fna.gz
 ga4gh:SQ.Ya6Rs7DHhDeg7YaOSg1EoNi3U_nQ9SvO	CM000663.2	CM000663.2 Homo sapiens chromosome 1, GRCh38 reference primary assembly
 ga4gh:SQ.pnAqCRBrTsUoBghSD1yp_jXWSmlbdh4g	CM000664.2	CM000664.2 Homo sapiens chromosome 2, GRCh38 reference primary assembly
 ga4gh:SQ.Zu7h9AggXxhTaGVsy7h_EZSChSZGcmgX	CM000665.2	CM000665.2 Homo sapiens chromosome 3, GRCh38 reference primary assembly
 
-snafu$ ./bin/fasta-ga4gh-identifier ~/Downloads/Homo_sapiens.GRCh38.dna.toplevel.fa.gz 
+snafu$ ./bin/fasta-ga4gh-identifier ~/Downloads/Homo_sapiens.GRCh38.dna.toplevel.fa.gz
 ga4gh:SQ.2YnepKM7OkBoOrKmvHbGqguVfF9amCST	1	1 dna:chromosome chromosome:GRCh38:1:1:248956422:1 REF
 ga4gh:SQ.lwDyBi432Py-7xnAISyQlnlhWDEaBPv2	2	2 dna:chromosome chromosome:GRCh38:2:1:242193529:1 REF
 ga4gh:SQ.Eqk6_SvMMDCc6C-uEfickOUWTatLMDQZ	3	3 dna:chromosome chromosome:GRCh38:3:1:198295559:1 REF

@@ -5,6 +5,6 @@ Change Log
 
 .. toctree::
    :maxdepth: 2
-   
+
    0.4/index
    0.5/index
@@ -3,12 +3,12 @@ bioutils -- bioinformatics utilities and lookup tables
 
 bioutils provides common utilities and lookup tables for bioinformatics.
 
-* bioutils.accessions -- parse accessions, infer namespaces 
+* bioutils.accessions -- parse accessions, infer namespaces
 * bioutils.assemblies -- Human assembly information (from NCBI/GRCh)
 * bioutils.cytobands -- map cytobands to coordinates (from UCSC cytoband tables)
 * bioutils.digests -- implementations of various digests
 * bioutils.normalize -- allele normalization (left shuffle, right shuffle, expanded, vcf)
-  
+
 To use an E-Utilities API key run add it to an environment variable
 called `ncbi_api_key` and it will be used in the E-Utilities request.
 

@@ -32,13 +32,10 @@ dependencies = [
 
 [project.optional-dependencies]
 dev = [
-    "bandit~=1.7",
     "build~=0.8",
-    "flake8~=4.0",
     "ipython~=8.4",
-    "isort~=5.10",
-    "pylint~=2.14",
-    "ruff == 0.4.4",
+    "pre-commit >= 4.2.0",
+    "ruff == 0.12.8",
 ]
 test = [
     "pytest~=7.1",
@@ -102,22 +99,95 @@ exclude_lines = [
     "if __name__ == .__main__.:",
 ]
 
-[tool.isort]
-profile = "black"
-
-# [tool.flake8]
-# flake8 does not support configuration in pyproject.toml
-# https://github.com/PyCQA/flake8/issues/234#issuecomment-812800832
-# The config in setup.cfg
-
-[tool.pylint.'MESSAGES CONTROL']
-disable = "R0913"
+[tool.ruff]
+src = ["src", "tests"]
+line-length = 100
+exclude = [
+    "bin",
+    "sbin",
+    "docs",
+]
 
+[tool.ruff.lint]
+select = ["ALL"]
+ignore = [
+    # not used
+    "AIR",
+    "ERA",
+    "FAST",
+    "YTT",
+    "FBT",
+    "CPY",
+    "DJ",
+    "EM",
+    "EXE",
+    "FIX",
+    "FA",
+    "INT",
+    "PYI",
+    "TID",
+    "TD",
+    "TC",
+    "C90",
+    "NPY",
+    "PD",
+    # ignore for compatibility with formatter
+    "D206",
+    "D300",
+    "W191",
+    "E111",
+    "E114",
+    "E117",
+    "E501",
+    "W191",
+    "S321",
+    "COM812",
+    # don't require types on *args, **kwargs
+    "ANN002",
+    "ANN003",
+    # subjective pylint thresholds
+    "PLR0904",
+    "PLR091",
+    "PLR1702",
+    # excessive docstring requirements
+    "D105",
+    "D205",
+    "D203",
+    "D213",  # conflicts with D212
+    "D400",
+    "D401",
+    "D403",
+    "D415",
+    # excessive type ignore requirements
+    "PGH003",
+    # kinda hairy to sort out
+    "PLC0206",
+    # maybe good to work on someday, but low priority
+    "TRY003",
+]
 
-[tool.pylint.format]
-max-line-length = "120"
+[tool.ruff.lint.per-file-ignores]
+# ANN001 - missing-type-function-argument
+# ANN2 - missing-return-type
+# D100 - undocumented-public-module
+# D102 - undocumented-public-class
+# D103 - undocumented-public-function
+# S101 - assert
+# B011 - assert-false
+# INP001 - implicit-namespace-package
+# PLR2004 - magic-value-comparison
+"tests/*" = [
+    "ANN001",
+    "ANN2",
+    "D10",
+    "S101",
+    "B011",
+    "INP001",
+    "PLR2004",
+]
 
+[tool.ruff.lint.flake8-annotations]
+mypy-init-return = true
 
-[tool.ruff]
-src = ["src", "tests"]
-line-length = 120
+[tool.ruff.format]
+docstring-code-format = true
@@ -145,4 +145,3 @@ if __name__ == "__main__":
         except Exception as e:
             logger.error("oopsie on " + assy_id_or_name)
             logger.exception(e)
-
@@ -36,7 +36,7 @@ create unique index assy_name_unique on assembly_sequence(assy, name);
 
 
 \copy assembly from assy.csv with CSV HEADER DELIMITER '   '
-\copy assembly_sequence from seqs.csv with CSV HEADER DELIMITER '        ' 
+\copy assembly_sequence from seqs.csv with CSV HEADER DELIMITER '        '
 
 """)
 
@@ -58,7 +58,7 @@ create unique index assy_name_unique on assembly_sequence(assy, name);
 
     for an in assys.keys():
         assy = assys[an]
-        
+
         assy_fh.writerow({
             "assy": an,
             "genbank_ac": assy["genbank_ac"],

@@ -21,7 +21,7 @@ rdr = csv.reader(open_any(sys.argv[1]), delimiter="\t")
 for row in rdr:
     chr, start, end, band, stain = row
     if chr.startswith("chr"):
-        chr = chr[3:] 
+        chr = chr[3:]
     chr_band_map[chr][band] = (int(start),int(end),stain)
 
 

@@ -1,3 +1,5 @@
+"""bioutils provides some common utilities and lookup tables for bioinformatics."""
+
 from importlib.metadata import PackageNotFoundError, version
 
 try:

@@ -16,5 +16,5 @@
 
 _logger = logging.getLogger(__package__)
 
-if sys.version_info < (3, 6):
+if sys.version_info < (3, 6):  # noqa: UP036
     _logger.warning(version_warning)
Original file line number	Diff line number	Diff line change
Expand Up		@@ -145,4 +145,3 @@ if __name__ == "__main__":
		except Exception as e:
		logger.error("oopsie on " + assy_id_or_name)
		logger.exception(e)