From e45d038ddfc6bbf4f57e140697db91f8d69e45d0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 2 Jul 2024 14:32:55 -0700 Subject: [PATCH 1/3] Fix incorrect module_name in README.md example for instantiating a HashStore --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index ec3dde38..20a39410 100644 --- a/README.md +++ b/README.md @@ -55,12 +55,12 @@ properties = { "store_path": "/path/to/your/store", "store_depth": 3, "store_width": 2, - "store_algorithm": "sha256", - "store_metadata_namespace": "http://ns.dataone.org/service/types/v2.0", + "store_algorithm": "SHA-256", + "store_metadata_namespace": "https://ns.dataone.org/service/types/v2.0#SystemMetadata", } # Get HashStore from factory -module_name = "hashstore.filehashstore.filehashstore" +module_name = "hashstore.filehashstore" class_name = "FileHashStore" my_store = hashstore_factory.get_hashstore(module_name, class_name, properties) From 2ad42aac29aefc2feb92f07d18ba5d84b1e6fa87 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 2 Jul 2024 14:40:25 -0700 Subject: [PATCH 2/3] Move 'threading' and 'multiprocessing' variable declaration to '__init__' process --- src/hashstore/filehashstore.py | 48 +++++++++++++++++++--------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/src/hashstore/filehashstore.py b/src/hashstore/filehashstore.py index 3e201e77..e96ef690 100644 --- a/src/hashstore/filehashstore.py +++ b/src/hashstore/filehashstore.py @@ -73,31 +73,37 @@ class FileHashStore(HashStore): "blake2b", "blake2s", ] - # Variables to orchestrate parallelization - # Thread Synchronization - object_lock = threading.Lock() - object_condition = threading.Condition(object_lock) - object_locked_pids = [] - metadata_lock = threading.Lock() - metadata_condition = threading.Condition(metadata_lock) - metadata_locked_docs = [] - reference_lock = threading.Lock() - reference_condition = threading.Condition(reference_lock) - reference_locked_cids = [] - # Multiprocessing Synchronization - object_lock_mp = multiprocessing.Lock() - object_condition_mp = multiprocessing.Condition(object_lock_mp) - object_locked_pids_mp = multiprocessing.Manager().list() - metadata_lock_mp = multiprocessing.Lock() - metadata_condition_mp = multiprocessing.Condition(metadata_lock_mp) - metadata_locked_docs_mp = multiprocessing.Manager().list() - reference_lock_mp = multiprocessing.Lock() - reference_condition_mp = multiprocessing.Condition(reference_lock_mp) - reference_locked_cids_mp = multiprocessing.Manager().list() def __init__(self, properties=None): + # Variables to orchestrate parallelization # Check to see whether a multiprocessing or threading sync lock should be used self.use_multiprocessing = os.getenv("USE_MULTIPROCESSING", "False") == "True" + if self.use_multiprocessing == "True": + # Create multiprocessing synchronization variables + self.object_lock_mp = multiprocessing.Lock() + self.object_condition_mp = multiprocessing.Condition(self.object_lock_mp) + self.object_locked_pids_mp = multiprocessing.Manager().list() + self.metadata_lock_mp = multiprocessing.Lock() + self.metadata_condition_mp = multiprocessing.Condition( + self.metadata_lock_mp + ) + self.metadata_locked_docs_mp = multiprocessing.Manager().list() + self.reference_lock_mp = multiprocessing.Lock() + self.reference_condition_mp = multiprocessing.Condition( + self.reference_lock_mp + ) + self.reference_locked_cids_mp = multiprocessing.Manager().list() + else: + # Create threading synchronization variables + self.object_lock = threading.Lock() + self.object_condition = threading.Condition(self.object_lock) + self.object_locked_pids = [] + self.metadata_lock = threading.Lock() + self.metadata_condition = threading.Condition(self.metadata_lock) + self.metadata_locked_docs = [] + self.reference_lock = threading.Lock() + self.reference_condition = threading.Condition(self.reference_lock) + self.reference_locked_cids = [] # Now check properties if properties: # Validate properties against existing configuration if present From 83d2421ded1415fa4d6fcbf2503372d834f0e9f0 Mon Sep 17 00:00:00 2001 From: Dou Mok Date: Tue, 2 Jul 2024 14:40:59 -0700 Subject: [PATCH 3/3] Update 'hashstoreclient' to set multiprocessing flag based on the test flag --- src/hashstore/hashstoreclient.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/hashstore/hashstoreclient.py b/src/hashstore/hashstoreclient.py index b02d764c..94292957 100644 --- a/src/hashstore/hashstoreclient.py +++ b/src/hashstore/hashstoreclient.py @@ -257,13 +257,13 @@ def __init__(self, properties, testflag=None): # Get HashStore from factory if testflag: + # Set multiprocessing to true if testing in knbvm module_name = "filehashstore" + os.environ["USE_MULTIPROCESSING"] = "True" else: module_name = "hashstore.filehashstore" class_name = "FileHashStore" - # Set multiprocessing to true - os.environ["USE_MULTIPROCESSING"] = "True" use_multiprocessing = os.getenv("USE_MULTIPROCESSING", "False") == "True" logging.info( "HashStoreClient - use_multiprocessing (bool): %s", use_multiprocessing