Skip to content

Commit

Permalink
#131 removed duplicated code parts
Browse files Browse the repository at this point in the history
  • Loading branch information
nicolay-r committed Jan 13, 2024
1 parent a706d79 commit 3e1b38f
Showing 1 changed file with 3 additions and 18 deletions.
21 changes: 3 additions & 18 deletions test/test_pipeline_sample.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,18 @@
from arekit.common.pipeline.items.base import BasePipelineItem
from arekit.common.utils import split_by_whitespaces

import utils
import unittest
import ru_sent_tokenize

from ru_sent_tokenize import ru_sent_tokenize
from os.path import join

from arekit.common.docs.base import Document
from arekit.common.utils import split_by_whitespaces
from arekit.common.docs.entities_grouping import EntitiesGroupingPipelineItem
from arekit.common.docs.sentence import BaseDocumentSentence
from arekit.common.experiment.data_type import DataType
from arekit.common.labels.base import NoLabel
from arekit.common.labels.scaler.single import SingleLabelScaler
from arekit.common.pipeline.base import BasePipelineLauncher
from arekit.common.synonyms.grouping import SynonymsCollectionValuesGroupingProviders
from arekit.common.data import const
from arekit.common.pipeline.context import PipelineContext
from arekit.common.pipeline.items.base import BasePipelineItem
from arekit.contrib.utils.data.writers.sqlite_native import SQliteWriter
from arekit.contrib.utils.processing.lemmatization.mystem import MystemWrapper
from arekit.contrib.utils.synonyms.stemmer_based import StemmerBasedSynonymCollection
Expand Down Expand Up @@ -49,16 +44,6 @@ class BertTestSerialization(unittest.TestCase):
model.
"""

@staticmethod
def input_to_docs(texts):
docs = []
for doc_id, contents in enumerate(texts):
sentences = ru_sent_tokenize(contents)
sentences = list(map(lambda text: BaseDocumentSentence(text), sentences))
doc = Document(doc_id=doc_id, sentences=sentences)
docs.append(doc)
return docs

@staticmethod
def iter_groups(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
Expand Down Expand Up @@ -95,7 +80,7 @@ def test(self):
]

# Composing labels formatter and experiment preparation.
doc_provider = utils.InMemoryDocProvider(docs=BertTestSerialization.input_to_docs(texts))
doc_provider = utils.InMemoryDocProvider(docs=utils.input_to_docs(texts))
pipeline = [
AREkitSerializerPipelineItem(
rows_provider=create_bert_sample_provider(
Expand Down

0 comments on commit 3e1b38f

Please sign in to comment.