Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

225 support gpu benchmarking #232

Open
wants to merge 15 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 54 additions & 0 deletions config/feelbvh/bvh.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
{
"executable": "/nvme0/lemoinep/feelppGPUGamma/feelpp/build/feelpp-clang-cpp20-spack-rocm-kokkos-none-release/testsuite/feelbvhgpu/feelpp_test_bvhgpu",
"output_directory": "{{machine.output_app_dir}}/javier_test/bvh/",
"use_case_name": "BVH",
"timeout":"0-00:10:00",
"resources":{
"tasks":1,
"gpus_per_node":1,
"exclusive_access":false
},
"options": [
"--log_level=test_suite",
"--",
"--directory={{output_directory}}/{{instance}}",
"--hsize={{parameters.hsize.value}}",
"--number_rays_desired={{parameters.nb_rays.value}}",
"--repository.append.np 0"
],
"outputs": [],
"scalability": {
"directory": "{{output_directory}}/{{instance}}",
"stages": [
{
"name": "",
"filepath": "results2.csv",
"format": "csv",
"units": {
"*":"ms",
"hsize":"u",
"maxNumElement":"",
"maxNumFace":"",
"maxNumPoints":"",
"maxNumVerices":"",
"nbRaysDesired":"",
"nbRays":""
}
}
]
},
"sanity": {
"success": [],
"error": []
},
"parameters": [
{
"name":"hsize",
"sequence":[0.1]
},
{
"name":"nb_rays",
"sequence":[10000]
}
]
}
70 changes: 70 additions & 0 deletions config/feelbvh/bvh_plots.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
{
"plots": [
{
"title":"Times CPU vs GPU",
"plot_types": ["grouped_bar","stacked_bar"],
"transformation": "performance",
"variables": ["totalTimeBVHRTcpu","totalTimeBVHRTgpu"],
"names": ["CPU", "GPU"],
"xaxis": {
"parameter":"hsize",
"label":"h"
},
"yaxis": {
"label":"Time (ms)"
},
"color_axis": {
"parameter": "performance_variable",
"label": "Device"
},
"secondary_axis": {
"parameter": "nb_rays",
"label":"# rays"
}
},
{
"title": "Times GPU",
"plot_types": ["grouped_bar","stacked_bar"],
"transformation": "performance",
"variables": ["timeBVHgpu","timeRTgpu","timeFastMarching"],
"names": ["BVH", "RT","FastMarching"],
"xaxis": {
"parameter":"hsize",
"label":"h"
},
"yaxis": {
"label":"Time (ms)"
},
"color_axis": {
"parameter": "performance_variable",
"label": "Device"
},
"secondary_axis": {
"parameter": "nb_rays",
"label":"# rays"
}
},
{
"title": "Times CPU",
"plot_types": ["grouped_bar","stacked_bar"],
"transformation": "performance",
"variables": ["timeBVHcpu","timeRTcpu"],
"names": ["BVH", "RT"],
"xaxis": {
"parameter":"hsize",
"label":"h"
},
"yaxis": {
"label":"Time (ms)"
},
"color_axis": {
"parameter": "performance_variable",
"label": "Device"
},
"secondary_axis": {
"parameter": "nb_rays",
"label":"# rays"
}
}
]
}
1 change: 1 addition & 0 deletions src/feelpp/benchmarking/reframe/config/configSchemas.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ class AdditionalFiles(BaseModel):
class Resources(BaseModel):
tasks: Optional[Union[str,int]] = None
tasks_per_node: Optional[Union[str,int]] = None
gpus_per_node: Optional[Union[str,int]] = None
nodes: Optional[Union[str,int]] = None
memory: Optional[Union[str,int]] = 0
exclusive_access: Optional[Union[str,bool]] = True
Expand Down
41 changes: 40 additions & 1 deletion src/feelpp/benchmarking/reframe/config/machineConfigs/gaya.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,45 @@
'memory_per_node':500
}
},
{
'name':'gpu',
'scheduler':'squeue',
'launcher':'mpiexec',
'max_jobs':4,
'access': ['--partition=gpu'],
'environs': ['default'],
'resources': [
{
'name': '_rfm_gpu',
'options': ['--gres=gpu:{num_gpus_per_node}'],
},
{
'name':'launcher_options',
'options':['-bind-to','none']
}
],
'prepare_cmds': [
'source /etc/profile.d/modules.sh',
"export PATH=/opt/apptainer/v1.3.5/apptainer/bin/:$PATH"
],
'processor': {
'num_cpus': 32
},
'devices': [
{
'type': 'gpu',
'num_devices': 3
}
],
'container_platforms':[
{
'type': 'Apptainer'
}
],
'extras':{
'memory_per_node':256
}
}
]
}
],
Expand All @@ -50,7 +89,7 @@
'modules': [],
'cc': 'clang',
'cxx': 'clang++',
'target_systems': ['gaya:production']
'target_systems': ['gaya:production','gaya:gpu']
},
{
'name': 'hpcx',
Expand Down
18 changes: 18 additions & 0 deletions src/feelpp/benchmarking/reframe/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ def configure(self, resources, rfm_test):
rfm_test.num_nodes = int(np.ceil(rfm_test.num_tasks / rfm_test.current_partition.processor.num_cpus))
rfm_test.num_tasks_per_node = min(rfm_test.num_tasks, rfm_test.current_partition.processor.num_cpus)

class GpusPerNodeStrategy(ResourceStrategy):
""" Strategy to set number of gpus """
def configure(self, resources, rfm_test):
rfm_test.num_gpus_per_node = int(resources.gpus_per_node)

def validate(self, rfm_test):
super().validate(rfm_test)
assert rfm_test.num_gpus_per_node > 0



class MemoryEnforcer:
""" Plugin to recompute resources based on the memory requirements
The number of nodes is computed as the ceil of the euclidean quotient of the memory divided by the memory per node
Expand Down Expand Up @@ -136,11 +147,18 @@ def setResources(resources, rfm_test):
strategy = TasksAndNodesStrategy()
elif resources.tasks:
strategy = TasksStrategy()
elif resources.gpus_per_node: # or resources.gpus:
pass
else:
raise ValueError("The Tasks parameter should contain either (tasks_per_node,nodes), (tasks,nodes), (tasks) or (tasks, tasks_per_node)")

strategy.configure(resources, rfm_test)

if resources.gpus_per_node: #or resources.gpus
gpu_strategy = GpusPerNodeStrategy()
gpu_strategy.configure(resources, rfm_test)
gpu_strategy.validate(rfm_test)

if resources.memory:
MemoryEnforcer(resources.memory).enforceMemory(rfm_test)

Expand Down
30 changes: 15 additions & 15 deletions src/feelpp/benchmarking/report/config/overviewConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
"yaxis": { "label": "Execution time (s)" },
"color_axis": {"parameter":"use_case","label":"Use Case"},
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"performance_variable","agg":"sum"},
{"column":"date","agg":"mean"}
Expand All @@ -30,7 +30,7 @@
"color_axis":{"parameter":"use_case", "label":"Use case"},
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"performance_variable","agg":"sum"}
]
Expand All @@ -44,7 +44,7 @@
"color_axis":{"parameter":"performance_variable", "label":"Performance Step"},
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"date","agg":"mean"}
]
Expand All @@ -59,7 +59,7 @@
"names": ["performance"],
"xaxis": { "parameter": "date", "label": "Date" },
"secondary_axis": { "parameter": "hsize|mesh", "label": "h size" },
"color_axis": { "parameter": "resources.tasks|nb_tasks.tasks", "label": "Tasks" },
"color_axis": { "parameter": "resources.tasks|nb_rays|nb_tasks.tasks", "label": "Tasks" },
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"performance_variable","agg":"sum"}
Expand All @@ -70,7 +70,7 @@
"plot_types": ["stacked_bar"],
"transformation": "performance",
"names": ["performance"],
"xaxis": { "parameter": "resources.tasks|nb_tasks.tasks", "label": "Tasks" },
"xaxis": { "parameter": "resources.tasks|nb_rays|nb_tasks.tasks", "label": "Tasks" },
"secondary_axis": { "parameter": "environment", "label": "Environment" },
"color_axis": { "parameter": "performance_variable", "label": "Step" },
"yaxis": { "label": "Execution time (s)" },
Expand All @@ -96,7 +96,7 @@
"yaxis": { "label": "Execution time (s)" },
"color_axis": {"parameter":"machine","label":"Machine"},
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"performance_variable","agg":"sum"},
{"column":"date","agg":"mean"}
Expand All @@ -114,7 +114,7 @@
"color_axis":{"parameter":"machine", "label":"Machine"},
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"performance_variable","agg":"sum"}
]
Expand All @@ -128,7 +128,7 @@
"color_axis":{"parameter":"performance_variable", "label":"Performance Step"},
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"date","agg":"mean"}
]
Expand All @@ -143,7 +143,7 @@
"names": ["performance"],
"xaxis": { "parameter": "date", "label": "Date" },
"secondary_axis": { "parameter": "hsize|mesh", "label": "h size" },
"color_axis": { "parameter": "resources.tasks|nb_tasks.tasks", "label": "Tasks" },
"color_axis": { "parameter": "resources.tasks|nb_rays|nb_tasks.tasks", "label": "Tasks" },
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"performance_variable","agg":"sum"}
Expand All @@ -154,7 +154,7 @@
"plot_types": ["stacked_bar"],
"transformation": "performance",
"names": ["performance"],
"xaxis": { "parameter": "resources.tasks|nb_tasks.tasks", "label": "Tasks" },
"xaxis": { "parameter": "resources.tasks|nb_rays|nb_tasks.tasks", "label": "Tasks" },
"secondary_axis": { "parameter": "environment", "label": "Environment" },
"color_axis": { "parameter": "performance_variable", "label": "Step" },
"yaxis": { "label": "Execution time (s)" },
Expand All @@ -180,7 +180,7 @@
"yaxis": { "label": "Execution time (s)" },
"color_axis": {"parameter":"machine","label":"Machine"},
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"performance_variable","agg":"sum"},
{"column":"date","agg":"mean"}
Expand All @@ -198,7 +198,7 @@
"color_axis":{"parameter":"machine", "label":"Machine"},
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"performance_variable","agg":"sum"}
]
Expand All @@ -212,7 +212,7 @@
"color_axis":{"parameter":"performance_variable", "label":"Performance Step"},
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"resources.tasks|nb_tasks.tasks","agg":"max"},
{"column":"resources.tasks|nb_rays|nb_tasks.tasks","agg":"max"},
{"column":"hsize|mesh","agg":"max"},
{"column":"date","agg":"mean"}
]
Expand All @@ -227,7 +227,7 @@
"names": ["performance"],
"xaxis": { "parameter": "date", "label": "Date" },
"secondary_axis": { "parameter": "hsize|mesh", "label": "h size" },
"color_axis": { "parameter": "resources.tasks|nb_tasks.tasks", "label": "Tasks" },
"color_axis": { "parameter": "resources.tasks|nb_rays|nb_tasks.tasks", "label": "Tasks" },
"yaxis": { "label": "Execution time (s)" },
"aggregations":[
{"column":"performance_variable","agg":"sum"}
Expand All @@ -238,7 +238,7 @@
"plot_types": ["stacked_bar"],
"transformation": "performance",
"names": ["performance"],
"xaxis": { "parameter": "resources.tasks|nb_tasks.tasks", "label": "Tasks" },
"xaxis": { "parameter": "resources.tasks|nb_rays|nb_tasks.tasks", "label": "Tasks" },
"secondary_axis": { "parameter": "environment", "label": "Environment" },
"color_axis": { "parameter": "performance_variable", "label": "Step" },
"yaxis": { "label": "Execution time (s)" },
Expand Down
Loading
Loading