From ff5876ab1ff56175b7541af9ec4f3e47472f70f5 Mon Sep 17 00:00:00 2001 From: Sara Mirzaee Date: Wed, 2 Sep 2020 12:04:59 -0500 Subject: [PATCH 1/2] job_defaults.cfg: add memory for run_15 --- minsar/defaults/job_defaults.cfg | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/minsar/defaults/job_defaults.cfg b/minsar/defaults/job_defaults.cfg index 7e2e6956..f70bcdfe 100644 --- a/minsar/defaults/job_defaults.cfg +++ b/minsar/defaults/job_defaults.cfg @@ -24,7 +24,7 @@ extract_stack_valid_region 0 00:01:00 4000 0 merge_reference_secondary_slc 00:02:45 0 4000 0 2 generate_burst_igram 0 00:00:30 4000 0 2 merge_burst_igram 0 00:00:10 4000 0 8 # for num_threads=4 got error with memory -filter_coherence 0 00:00:40 6000 0 8 # for num_threads=4 got error with memory +filter_coherence 0 00:00:40 6000 1000 8 # for num_threads=4 got error with memory unwrap 0 00:01:00 5000 0 2 merge 00:02:45 0 4000 0 2 From 52402cd59c8039214d0eaf01ee96e1ed1d46f8b2 Mon Sep 17 00:00:00 2001 From: Sara Mirzaee Date: Wed, 2 Sep 2020 12:05:36 -0500 Subject: [PATCH 2/2] job_submission.py: modify memory limit --- minsar/job_submission.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/minsar/job_submission.py b/minsar/job_submission.py index f55c06ad..80f8dabe 100755 --- a/minsar/job_submission.py +++ b/minsar/job_submission.py @@ -440,10 +440,15 @@ def split_jobs(self, batch_file, tasks, number_of_nodes): number_of_parallel_tasks = int(np.ceil(len(tasks) / number_of_jobs)) number_of_limited_memory_tasks = int(self.max_memory_per_node*number_of_nodes_per_job/self.default_memory) + while number_of_limited_memory_tasks < number_of_parallel_tasks: + if number_of_jobs < int(self.max_jobs_per_queue): + number_of_jobs += 1 + number_of_parallel_tasks = int(np.ceil(len(tasks) / number_of_jobs)) + else: + break + while number_of_limited_memory_tasks < number_of_parallel_tasks: number_of_nodes_per_job = number_of_nodes_per_job + 1 - number_of_jobs = np.ceil(number_of_nodes / number_of_nodes_per_job) - number_of_parallel_tasks = int(np.ceil(len(tasks) / number_of_jobs)) number_of_limited_memory_tasks = int(self.max_memory_per_node * number_of_nodes_per_job / self.default_memory) if number_of_nodes_per_job > 1: