From 46be4614c0e2c89f77e5ea8cb3dbf3b98ecabe37 Mon Sep 17 00:00:00 2001 From: Nicolay Rusnachenko Date: Sat, 13 Jan 2024 11:03:38 +0000 Subject: [PATCH] #131 removed duplicated code parts --- test/test_pipeline_infer.py | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) diff --git a/test/test_pipeline_infer.py b/test/test_pipeline_infer.py index 49c600d..d7199a1 100644 --- a/test/test_pipeline_infer.py +++ b/test/test_pipeline_infer.py @@ -14,9 +14,7 @@ from arekit.common.data import const from arekit.common.pipeline.context import PipelineContext from arekit.contrib.utils.data.storages.row_cache import RowCacheStorage -from arekit.common.docs.base import Document from arekit.common.docs.entities_grouping import EntitiesGroupingPipelineItem -from arekit.common.docs.sentence import BaseDocumentSentence from arekit.common.experiment.data_type import DataType from arekit.common.pipeline.base import BasePipelineLauncher from arekit.common.synonyms.grouping import SynonymsCollectionValuesGroupingProviders @@ -33,8 +31,6 @@ from arelight.synonyms import iter_synonym_groups from arelight.utils import IdAssigner, get_default_download_dir -from ru_sent_tokenize import ru_sent_tokenize - class TestInfer(unittest.TestCase): @@ -57,17 +53,6 @@ def iter_groups(filepath): for data in iter_synonym_groups(file): yield data - @staticmethod - def input_to_docs(texts): - assert(isinstance(texts, list)) - docs = [] - for doc_id, contents in enumerate(texts): - sentences = ru_sent_tokenize(contents) - sentences = list(map(lambda text: BaseDocumentSentence(text), sentences)) - doc = Document(doc_id=doc_id, sentences=sentences) - docs.append(doc) - return docs - def create_sampling_params(self): target_func = lambda data_type: join(utils.TEST_OUT_DIR, "-".join(["samples", data_type.name.lower()])) @@ -111,7 +96,7 @@ def launch(self, pipeline): synonyms=synonyms, dist_in_terms_bound=100, dist_in_sentences=0, - doc_provider=utils.InMemoryDocProvider(docs=self.input_to_docs(actual_content)), + doc_provider=utils.InMemoryDocProvider(docs=utils.input_to_docs(actual_content)), terms_per_context=50, text_pipeline=text_parser)