Skip to content

Commit

Permalink
use ruff to format and lint (#117)
Browse files Browse the repository at this point in the history
  • Loading branch information
nikvaessen authored Feb 15, 2025
1 parent 46f6364 commit c1b0d5e
Show file tree
Hide file tree
Showing 17 changed files with 160 additions and 57 deletions.
13 changes: 5 additions & 8 deletions .github/workflows/pythonpackage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,17 +34,14 @@ jobs:
run: |
uv sync --all-extras --all-groups
- name: Lint with flake8
- name: Lint with ruff
run: |
# stop the build if there are Python syntax errors or undefined names
uv run flake8 src/jiwer --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
uv run flake8 src/jiwer --count --exit-zero --max-complexity=10 --max-line-length=88 --statistics
uv run ruff check .
- name: Check formatting with black
- name: Check formatting with ruff
run: |
uv run black . --check
uv run ruff format . --diff
build:
runs-on: ubuntu-latest
strategy:
Expand Down
1 change: 1 addition & 0 deletions docs/gen_ref_pages.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Generate the code reference pages and navigation."""

from pathlib import Path

import mkdocs_gen_files

nav = mkdocs_gen_files.Nav()
Expand Down
8 changes: 8 additions & 0 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,14 @@ test-quick:
test:
uv run --group dev pytest

lint:
uv run --group dev ruff check .
uv run --group dev ruff format . --diff

format:
uv run --group dev ruff check --select I,RUF022 --fix .
uv run --group dev ruff format .

serve-docs:
uv run --group docs mkdocs serve

Expand Down
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ jiwer = "jiwer.cli:cli"

[dependency-groups]
dev = [
"black>=24.8.0",
"flake8>=5.0.4",
"ruff>=0.9.6",
"pytest>=8.3.4",
"pytest-benchmark>=4.0.0",
]
Expand All @@ -34,3 +33,9 @@ docs = [
[build-system]
requires = ["hatchling"]
build-backend = "hatchling.build"

[tool.ruff.lint.isort]
lines-between-types = 1

[tool.ruff.lint]
select = ["E4", "E7", "E9", "F", "B", "Q", "N", "I"]
81 changes: 76 additions & 5 deletions src/jiwer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,78 @@
from .measures import *
from .transforms import *
from .transformations import *
from .alignment import *
from .process import *
from .alignment import collect_error_counts, visualize_alignment, visualize_error_counts
from .measures import cer, mer, wer, wil, wip
from .process import (
AlignmentChunk,
CharacterOutput,
WordOutput,
process_characters,
process_words,
)
from .transformations import (
cer_contiguous,
cer_default,
wer_contiguous,
wer_default,
wer_standardize,
wer_standardize_contiguous,
)
from .transforms import (
AbstractTransform,
Compose,
ExpandCommonEnglishContractions,
ReduceToListOfListOfChars,
ReduceToListOfListOfWords,
ReduceToSingleSentence,
RemoveEmptyStrings,
RemoveKaldiNonWords,
RemoveMultipleSpaces,
RemovePunctuation,
RemoveSpecificWords,
RemoveWhiteSpace,
Strip,
SubstituteRegexes,
SubstituteWords,
ToLowerCase,
ToUpperCase,
)

name = "jiwer"

__version__ = "4.0.0"
__all__ = [
visualize_alignment,
visualize_error_counts,
collect_error_counts,
cer,
mer,
wer,
wil,
wip,
AlignmentChunk,
CharacterOutput,
WordOutput,
process_characters,
process_words,
AbstractTransform,
Compose,
ExpandCommonEnglishContractions,
ReduceToListOfListOfChars,
ReduceToListOfListOfWords,
ReduceToSingleSentence,
RemoveEmptyStrings,
RemoveKaldiNonWords,
RemoveMultipleSpaces,
RemovePunctuation,
RemoveSpecificWords,
RemoveWhiteSpace,
Strip,
SubstituteRegexes,
SubstituteWords,
ToLowerCase,
ToUpperCase,
cer_contiguous,
cer_default,
wer_contiguous,
wer_default,
wer_standardize,
wer_standardize_contiguous,
]
19 changes: 10 additions & 9 deletions src/jiwer/alignment.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,13 @@
Utility method to visualize the alignment and errors between one or more reference
and hypothesis pairs.
"""

from collections import defaultdict
from typing import List, Union, Optional
from typing import List, Optional, Union

from jiwer.process import CharacterOutput, WordOutput, AlignmentChunk
from jiwer.process import AlignmentChunk, CharacterOutput, WordOutput

__all__ = ["visualize_alignment", "collect_error_counts", "visualize_error_counts"]
__all__ = ["collect_error_counts", "visualize_alignment", "visualize_error_counts"]


def visualize_alignment(
Expand Down Expand Up @@ -131,7 +132,7 @@ def visualize_alignment(
):
continue

final_str += f"=== SENTENCE {idx+1} ===\n\n"
final_str += f"=== SENTENCE {idx + 1} ===\n\n"
final_str += _construct_comparison_string(
gt, hp, chunks, include_space_seperator=not is_cer, line_width=line_width
)
Expand All @@ -146,12 +147,12 @@ def visualize_alignment(
final_str += f"hits={output.hits}\n"

if is_cer:
final_str += f"\ncer={output.cer*100:.2f}%\n"
final_str += f"\ncer={output.cer * 100:.2f}%\n"
else:
final_str += f"\nmer={output.mer*100:.2f}%"
final_str += f"\nwil={output.wil*100:.2f}%"
final_str += f"\nwip={output.wip*100:.2f}%"
final_str += f"\nwer={output.wer*100:.2f}%\n"
final_str += f"\nmer={output.mer * 100:.2f}%"
final_str += f"\nwil={output.wil * 100:.2f}%"
final_str += f"\nwip={output.wip * 100:.2f}%"
final_str += f"\nwer={output.wer * 100:.2f}%\n"
else:
# remove last newline
final_str = final_str[:-1]
Expand Down
3 changes: 2 additions & 1 deletion src/jiwer/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,10 @@
Provide a simple CLI wrapper for JiWER. The CLI does not support custom transforms.
"""

import click
import pathlib

import click

import jiwer


Expand Down
9 changes: 5 additions & 4 deletions src/jiwer/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,18 +41,19 @@
[jiwer.CharacterOutput][process.CharacterOutput]
classes.
"""

from typing import List, Union

from jiwer import transforms as tr
from jiwer.transformations import wer_default, cer_default
from jiwer.process import process_words, process_characters
from jiwer.process import process_characters, process_words
from jiwer.transformations import cer_default, wer_default

__all__ = [
"wer",
"cer",
"mer",
"wer",
"wil",
"wip",
"cer",
]

########################################################################################
Expand Down
17 changes: 8 additions & 9 deletions src/jiwer/process.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,21 @@
so that measures can be computed and an alignment can be visualized.
"""

from dataclasses import dataclass
from collections import defaultdict
from dataclasses import dataclass
from typing import Any, List, Union

import rapidfuzz

from jiwer import transforms as tr
from jiwer.transformations import wer_default, cer_default

from jiwer.transformations import cer_default, wer_default

__all__ = [
"AlignmentChunk",
"WordOutput",
"CharacterOutput",
"process_words",
"WordOutput",
"process_characters",
"process_words",
]


Expand Down Expand Up @@ -228,7 +227,7 @@ def process_words(
alignments.append(sentence_op_chunks)

# Compute all measures
S, D, I, H = num_substitutions, num_deletions, num_insertions, num_hits
subs, dels, ins, hits = num_substitutions, num_deletions, num_insertions, num_hits

# special edge-case for empty references
if num_rf_words == 0:
Expand All @@ -244,12 +243,12 @@ def process_words(
wip = 0

else:
wer = float(S + D + I) / float(H + S + D)
mer = float(S + D + I) / float(H + S + D + I)
wer = float(subs + dels + ins) / float(hits + subs + dels)
mer = float(subs + dels + ins) / float(hits + subs + dels + ins)

# there is an edge-case when hypothesis is empty
if num_hp_words >= 1:
wip = (float(H) / num_rf_words) * (float(H) / num_hp_words)
wip = (float(hits) / num_rf_words) * (float(hits) / num_hp_words)
else:
wip = 0

Expand Down
6 changes: 3 additions & 3 deletions src/jiwer/transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@
import jiwer.transforms as tr

__all__ = [
"wer_default",
"cer_contiguous",
"cer_default",
"wer_contiguous",
"wer_default",
"wer_standardize",
"wer_standardize_contiguous",
"cer_default",
"cer_contiguous",
]

########################################################################################
Expand Down
9 changes: 4 additions & 5 deletions src/jiwer/transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,23 +33,22 @@
[transforms.ReduceToListOfListOfChars][].
"""

import sys
import functools
import re
import string
import sys
import unicodedata

from typing import Iterable, Union, List, Mapping

from typing import Iterable, List, Mapping, Union

__all__ = [
"AbstractTransform",
"Compose",
"ExpandCommonEnglishContractions",
"RemoveEmptyStrings",
"ReduceToListOfListOfWords",
"ReduceToListOfListOfChars",
"ReduceToListOfListOfWords",
"ReduceToSingleSentence",
"RemoveEmptyStrings",
"RemoveKaldiNonWords",
"RemoveMultipleSpaces",
"RemovePunctuation",
Expand Down
2 changes: 2 additions & 0 deletions tests/test_alignment.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import unittest

import jiwer

from jiwer import visualize_alignment


Expand Down
3 changes: 0 additions & 3 deletions tests/test_cer.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
import unittest
import pytest

import jiwer

from .test_measures import assert_dict_almost_equal


class TestCERInputMethods(unittest.TestCase):
def test_input_ref_string_hyp_string(self):
Expand Down
1 change: 1 addition & 0 deletions tests/test_empty_ref.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

import jiwer


Expand Down
1 change: 1 addition & 0 deletions tests/test_large_vocab.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import pytest

from jiwer import process_words, wer


Expand Down
16 changes: 10 additions & 6 deletions tests/test_measures.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import functools
import unittest
import pytest

import jiwer


Expand Down Expand Up @@ -217,11 +218,14 @@ def test_fail_on_different_sentence_length(self):
jiwer.wip,
jiwer.mer,
]:

def callback():
method(["hello", "this", "sentence", "is fractured"], ["this sentence"])

self.assertRaises(ValueError, callback)
self.assertRaises(
ValueError,
functools.partial(
method,
["hello", "this", "sentence", "is fractured"],
["this sentence"],
),
)

def test_known_values(self):
# Taken from the "From WER and RIL to MER and WIL" paper, for link see README.md
Expand Down
Loading

0 comments on commit c1b0d5e

Please sign in to comment.