From de7d94229c44b84e43731613712ca0b1e79180dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Yves=20Leli=C3=A8vre?= Date: Thu, 25 Jul 2024 17:18:03 +0200 Subject: [PATCH] Fix article_extractor to only instantiate the Extractors in the specified file --- newsplease/pipeline/extractor/article_extractor.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/newsplease/pipeline/extractor/article_extractor.py b/newsplease/pipeline/extractor/article_extractor.py index 0a9f2404..ce246b48 100644 --- a/newsplease/pipeline/extractor/article_extractor.py +++ b/newsplease/pipeline/extractor/article_extractor.py @@ -18,6 +18,7 @@ def __init__(self, extractor_list): :param extractor_list: List of strings containing all extractors to be initialized. """ + def proc_instance(instance): if instance is not None: self.log.info('Extractor initialized: %s', extractor) @@ -35,15 +36,19 @@ def proc_instance(instance): else: extractor_module = extractor - module = importlib.import_module(__package__ + '.extractors.' + extractor_module) + module_name = __package__ + '.extractors.' + extractor_module + module = importlib.import_module(module_name) if isinstance(extractor, tuple): proc_instance(getattr(module, extractor[1], None)()) else: - # check module for subclasses of AbstractExtractor + # check in the current module for subclasses of AbstractExtractor for member in inspect.getmembers(module, inspect.isclass): - if issubclass(member[1], AbstractExtractor) and member[0] != 'AbstractExtractor': - + if ( + member[1].__module__ == module_name + and issubclass(member[1], AbstractExtractor) + and member[0] != 'AbstractExtractor' + ): # instantiate extractor proc_instance(getattr(module, member[0], None)())