diff --git a/.travis.yml b/.travis.yml index 029f2f2b4..d9c736c74 100644 --- a/.travis.yml +++ b/.travis.yml @@ -19,27 +19,6 @@ env: matrix: include: - # OSX, xcode 10 - - os: osx - osx_image: xcode10 - env: CMAKE_OPTIONS="-DUSER_PATH_QT=~/usr/local/Cellar/qt/5.11.0" PYPI=true - # Linux, GCC 6 - - os: linux - env: COMPILER_C=gcc-6 COMPILER_CXX=g++-6 CMAKE_OPTIONS="-DUSER_PATH_QT=~/opt/qt511" PYPI=true - compiler: g++ - addons: - apt: - packages: - - g++-6 - - qt511base - - qt511charts-no-lgpl - - ca-certificates - - python2.7 - - mesa-common-dev - sources: - - sourceline: 'ppa:ubuntu-toolchain-r/test' - - sourceline: 'ppa:beineri/opt-qt-5.11.1-bionic' - # - sourceline: 'ppa:jonathonf/python-2.7' # Linux, GCC 5 - os: linux env: COMPILER_C=gcc-5 COMPILER_CXX=g++-5 CMAKE_OPTIONS="-DUSER_PATH_QT=~/opt/qt511" PYPI=false @@ -125,33 +104,13 @@ matrix: - sourceline: 'ppa:beineri/opt-qt-5.11.1-bionic' # - sourceline: 'ppa:jonathonf/python-2.7' - # - compiler: clang - # addons: - # apt: - # sources: - # - ubuntu-toolchain-r-test - # - llvm-toolchain-precise-3.6 - # packages: - # - clang-3.6 - # env: COMPILER=clang++-3.6 - # - compiler: clang - # addons: - # apt: - # sources: - # - ubuntu-toolchain-r-test - # - llvm-toolchain-precise-3.7 - # packages: - # - clang-3.7 - # env: COMPILER=clang++-3.7 - before_install: + - sudo apt-get install - python --version - if [ "$TRAVIS_OS_NAME" != "osx" ]; then sudo pip uninstall -y six chardet pyOpenSSL; fi - sudo pip install --upgrade pip six pyOpenSSL - sudo pip install wheel numpy urllib3==1.23 twine coveralls chardet - # Run homebrew on osx - - if [ "$TRAVIS_OS_NAME" == "osx" ]; then brew update; fi # Setup default versions and override compiler if needed - if [[ "${LLVM_VERSION}" == "default" ]]; then LLVM_VERSION=3.9.0; fi # Install a recent CMake (unless already installed on OS X) @@ -164,38 +123,17 @@ before_install: export PATH=${DEPS_DIR}/cmake/bin:${PATH} export CC=${COMPILER_C} CXX=${COMPILER_CXX} source /opt/qt511/bin/qt511-env.sh - else - if ! brew ls --version cmake &>/dev/null; then brew install cmake; fi - brew install cppcheck qt - brew link --force qt - export PATH=/usr/local/opt/qt/bin:${PATH} - fi - # Install CUDA - - | - if [[ "$USE_CUDA" == "true" ]]; then - export CUDA=10.1.105-1 - # get cuda-repo-ubuntu1804 - travis_retry wget http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/cuda-repo-ubuntu1804_${CUDA}_amd64.deb - travis_retry sudo apt-key adv --fetch-keys http://developer.download.nvidia.com/compute/cuda/repos/ubuntu1804/x86_64/7fa2af80.pub - travis_retry sudo dpkg -i cuda-repo-ubuntu1804_${CUDA}_amd64.deb - # install - travis_retry sudo apt-get update -qq - export CUDA_APT=${CUDA:0:4} - export CUDA_APT=${CUDA_APT/./-} - travis_retry sudo apt-get install -y cuda-drivers cuda-core-${CUDA_APT} cuda-cudart-dev-${CUDA_APT} cuda-curand-dev-${CUDA_APT} cuda-cufft-dev-${CUDA_APT} --allow-unauthenticated - travis_retry sudo apt-get clean - # export - export CUDA_HOME=/usr/local/cuda-${CUDA:0:4} - export LD_LIBRARY_PATH=${CUDA_HOME}/lib64:${LD_LIBRARY_PATH} - export PATH=${CUDA_HOME}/bin:${PATH} - export CMAKE_OPTIONS="${CMAKE_OPTIONS} -DCMAKE_C_COMPILER=${COMPILER_C} -DCMAKE_CXX_COMPILER=${COMPILER_CXX} -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_HOME}" - ls -a ${CUDA_HOME} ${CUDA_HOME}/bin ${CUDA_HOME}/include - fi - + install: # Build everything - cd ${TRAVIS_BUILD_DIR} + - sudo apt-get -qq update + - sudo apt-get install -y libassimp-dev libglm-dev graphviz libxcb-dri3-0 libxcb-present0 libpciaccess0 cmake libpng-dev libxcb-dri3-dev libx11-dev libx11-xcb-dev libmirclient-dev libwayland-dev libxrandr-dev + - export VK_VERSION=1.2.131.1 + - wget -O vulkansdk-linux-x86_64-$VK_VERSION.tar.gz https://sdk.lunarg.com/sdk/download/$VK_VERSION/linux/vulkansdk-linux-x86_64-$VK_VERSION.tar.gz + - tar zxf vulkansdk-linux-x86_64-$VK_VERSION.tar.gz + - export VULKAN_SDK=$TRAVIS_BUILD_DIR/$VK_VERSION/x86_64 - | mkdir -p build cd build @@ -256,11 +194,3 @@ script: - if [ "$COVERAGE" == "true" ]; then lcov -a baseline.info -a after_test.info -o total_test.info; fi - if [ "$COVERAGE" == "true" ]; then lcov -r total_test.info \*thirdparty\* \*/test/\* \*Collection\* \*DLL_\* -o coverage.info; fi - if [ "$COVERAGE" == "true" ]; then bash <(curl -s https://codecov.io/bash) -f coverage.info || echo "Codecov did not collect coverage reports"; fi - - # Run Python coverage report generation and upload to coveralls - - cd ../core/python - - if [ "$COVERAGE" == "true" ]; then coverage run --source spirit --omit=spirit/spiritlib.py,spirit/collection.py setup.py test > cov.txt; fi - - if [ "$COVERAGE" == "true" ]; then head cov.txt; fi - - if [ "$COVERAGE" == "true" ]; then coverage report -m; fi - - if [ "$COVERAGE" == "true" ]; then coverage xml; fi - - if [ "$COVERAGE" == "true" ]; then coveralls; fi \ No newline at end of file diff --git a/core/include/Spirit/Hamiltonian.h b/core/include/Spirit/Hamiltonian.h index 03ead59b1..c02d380b4 100644 --- a/core/include/Spirit/Hamiltonian.h +++ b/core/include/Spirit/Hamiltonian.h @@ -111,6 +111,9 @@ PREFIX void Hamiltonian_Set_Exchange_Tensor(State *state, float exchange_tensor, // Set the Dzyaloshinskii-Moriya interaction tensor for micromagnetics [meV] PREFIX void Hamiltonian_Set_DMI_Tensor(State *state, float dmi_tensor[2], int region_id, int idx_image=-1, int idx_chain=-1) SUFFIX; + +//Set regions damping +PREFIX void Hamiltonian_Set_damping(State* state, float alpha, int region_id, int idx_image = -1, int idx_chain = -1) SUFFIX; // Set the frozen spins PREFIX void Hamiltonian_Set_frozen_spins(State* state, float frozen_spins, int region_id, int idx_image = -1, int idx_chain = -1) SUFFIX; @@ -180,6 +183,8 @@ PREFIX int Hamiltonian_Get_DMI_N_Pairs(State *state, int idx_image=-1, int idx_ //Micromagnetics DMI tensor PREFIX void Hamiltonian_Get_DMI_Tensor(State *state, float * dmi_tensor, int region_id, int idx_image=-1, int idx_chain=-1) SUFFIX; +//Get LLG damping +PREFIX void Hamiltonian_Get_damping(State* state, float* alpha, int region_id, int idx_image = -1, int idx_chain = -1) SUFFIX; //Micromagnetics DDI coefficient PREFIX void Hamiltonian_Get_DDI_coefficient(State* state, float* ddi, int region_id, int idx_image = -1, int idx_chain = -1) SUFFIX; //Micromagnetics frozen spins diff --git a/core/include/Spirit/Parameters_LLG.h b/core/include/Spirit/Parameters_LLG.h index 71842e84c..106e6f720 100644 --- a/core/include/Spirit/Parameters_LLG.h +++ b/core/include/Spirit/Parameters_LLG.h @@ -167,5 +167,21 @@ Returns the spin current configuration. */ PREFIX void Parameters_LLG_Get_STT(State *state, bool * use_gradient, float * magnitude, float normal[3], int idx_image=-1, int idx_chain=-1) SUFFIX; + + +//micromagnetics +PREFIX void Parameters_LLG_Set_dt(State* state, float dt, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Set_max_torque(State* state, float maxTorque, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Set_max_move(State* state, float max_move, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Set_n_LBFGS(State* state, int n_LBFGS, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Set_grouped_iterations(State* state, int grouped_iterations, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Set_save_period(State* state, int save_period, int idx_image = -1, int idx_chain = -1) SUFFIX; + +PREFIX void Parameters_LLG_Get_dt(State* state, float *dt, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Get_max_torque(State* state, float * maxTorque, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Get_max_move(State* state, float *max_move, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Get_n_LBFGS(State* state, int*n_LBFGS, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Get_grouped_iterations(State* state, int *grouped_iterations, int idx_image = -1, int idx_chain = -1) SUFFIX; +PREFIX void Parameters_LLG_Get_save_period(State* state, int*save_period, int idx_image = -1, int idx_chain = -1) SUFFIX; #include "DLL_Undefine_Export.h" #endif \ No newline at end of file diff --git a/core/include/Spirit/Simulation.h b/core/include/Spirit/Simulation.h index 0a8fa4a55..9373ad678 100644 --- a/core/include/Spirit/Simulation.h +++ b/core/include/Spirit/Simulation.h @@ -52,6 +52,9 @@ Note that the VP and LBFGS Solvers are only meant for direct minimization and no // `Solver_VP_OSO`: Verlet-like velocity projection, exponential transform #define Solver_VP_OSO 7 +// `Solver_CG_OSO`: CG, exponential transform +#define Solver_CG_OSO 8 + /* Start or stop a simulation -------------------------------------------------------------------- diff --git a/core/include/data/Vulkan_Compute.hpp b/core/include/data/Vulkan_Compute.hpp index 99f08a12d..57d5be40f 100644 --- a/core/include/data/Vulkan_Compute.hpp +++ b/core/include/data/Vulkan_Compute.hpp @@ -25,7 +25,7 @@ const std::vector validationLayers = { "VK_LAYER_KHRONOS_validation" }; -#ifdef NDEBUG +#ifndef NDEBUG const bool enableValidationLayers = false; #else const bool enableValidationLayers = true; @@ -51,6 +51,7 @@ namespace VulkanCompute int n_lbfgs_memory=3; bool performZeropadding[3] = { false, false, false }; scalar gamma= 0.00176085964411; + scalar maxTorque = 1e-2; scalar max_move=200; scalar kernel_accuracy=6.0; int GPU_ID=0; @@ -100,6 +101,10 @@ namespace VulkanCompute scalar alpha; scalar rhopdg; } apply2Consts; + struct ApplyCGConsts { + uint32_t pad; + scalar dt; + } applyCGConsts; struct ApplyVP1Consts { uint32_t n; uint32_t pad; @@ -204,6 +209,7 @@ namespace VulkanCompute VkCommandBuffer commandBufferTransferSolver; VkCommandBuffer commandBufferTransferSolver2; VkCommandBuffer commandBufferFullVP; + VkCommandBuffer commandBufferFullCG; VkCommandBuffer commandBufferFullLBFGS; VkCommandBuffer commandBufferFullDepondt; VkCommandBuffer commandBufferFullRK4; @@ -737,6 +743,10 @@ namespace VulkanCompute initRK4(); createRK4(&vulkanLBFGS); break; + case 4: + initCG(); + createCG(&vulkanLBFGS); + break; } } void updateRegionsBook(regionbook regions_book, int region_num) { @@ -764,18 +774,26 @@ namespace VulkanCompute launchConfiguration.solver_type = -1; break; case 1: + deleteVP(); deleteCollectionVP(&vulkanLBFGS); launchConfiguration.solver_type = -1; break; case 2: + deleteDepondt(); deleteCollectionDepondt(&vulkanLBFGS); launchConfiguration.solver_type = -1; break; case 3: + deleteRK4(); deleteCollectionRK4(&vulkanLBFGS); launchConfiguration.solver_type = -1; break; + case 4: + deleteCG(); + deleteCollectionCG(&vulkanLBFGS); + launchConfiguration.solver_type = -1; + break; } } void Prepare_DDI_kernel() @@ -1650,14 +1668,17 @@ namespace VulkanCompute vulkanReduce->sizes[i] = n; allocateBuffer(&vulkanReduce->buffer[i], &vulkanReduce->deviceMemory[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 9*vulkanReduce->sizes[i] * sizeof(float)); } - + /*n = (n + localSize - 1) / localSize; + vulkanReduce->sizes[vulkanReduce->bufferNum - 1] = n; + allocateBuffer(&vulkanReduce->buffer[vulkanReduce->bufferNum - 1], &vulkanReduce->deviceMemory[vulkanReduce->bufferNum - 1], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 18 * vulkanReduce->sizes[vulkanReduce->bufferNum - 1] * sizeof(float)); + */ n = SIZES[0] * SIZES[1] * SIZES[2]; for (int i = 0; i < vulkanReduce->bufferNumMax; i++) { n = (n + localSize - 1) / localSize; vulkanReduce->sizesMax[i] = n; } - allocateBuffer(&vulkanReduce->lastMax, &vulkanReduce->deviceMemoryLastMax, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 17*sizeof(float)); + allocateBuffer(&vulkanReduce->lastMax, &vulkanReduce->deviceMemoryLastMax, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 18*sizeof(float)); } void deleteReduceBuffers(VulkanReduce* vulkanReduce) { @@ -4091,8 +4112,8 @@ namespace VulkanCompute if (i == 8) { descriptorBufferInfo.buffer = vulkanReduce.lastMax; - descriptorBufferInfo.offset = 16 * sizeof(float); - descriptorBufferInfo.range = 2 * sizeof(float); + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = 18 * sizeof(float); } VkWriteDescriptorSet writeDescriptorSet = { }; writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; @@ -6203,8 +6224,455 @@ namespace VulkanCompute } + //CG + void initCG() { + int n = SIZES[0] * SIZES[1] * SIZES[2]; + + /*vectorfield velocity = vectorfield(n, { 0,0,0 }); + vectorfield searchdir = vectorfield(n, { 0,0,0 }); + vectorfield grad = vectorfield(n, { 0,0,0 }); + vectorfield grad_pr = vectorfield(n, { 0,0,0 });*/ + + //vulkanReduce.bufferNum++; + vulkanLBFGS.bufferSizes = (VkDeviceSize*)malloc(sizeof(VkDeviceSize)); + + for (int i = 0; i < 1; i++) + { + vulkanLBFGS.bufferSizes[i] = 3 * n * sizeof(float); + } + + vulkanLBFGS.buffer = (VkBuffer*)malloc(sizeof(VkBuffer)); + vulkanLBFGS.deviceMemory = (VkDeviceMemory*)malloc(sizeof(VkDeviceMemory)); + + for (int i = 0; i < 1; i++) + { + allocateBuffer(&vulkanLBFGS.buffer[i], &vulkanLBFGS.deviceMemory[i], VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT, VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, vulkanLBFGS.bufferSizes[i]); + } + /*uint32_t* reduce = (uint32_t*)malloc(vulkanReduce.sizes[vulkanReduce.bufferNum-1]); + for (uint32_t i = 0; i < vulkanReduce.sizes[vulkanReduce.bufferNum - 1] / sizeof(scalar); ++i) { + reduce[i] = 0; + } + transferDataFromCPU(reduce, vulkanReduce.sizes[vulkanReduce.bufferNum - 1], &vulkanReduce.buffer[vulkanReduce.bufferNum - 1]); + */ + } + void deleteCG() { + + for (int i = 0; i < 1; i++) + { + vkFreeMemory(device, vulkanLBFGS.deviceMemory[i], NULL); + vkDestroyBuffer(device, vulkanLBFGS.buffer[i], NULL); + } + } + void createCG(VulkanLBFGS* vulkanLBFGS) { + uint32_t nos = SIZES[0] * SIZES[1] * SIZES[2]; + createReduceVPFinish(&vulkanLBFGS->collectionReduceDotFinish, &vulkanReduce, 1); + //createReduceDotFinish(&vulkanLBFGS->collectionReduceDotFinish, &vulkanReduce, 1); + //createOsoCalcGradients(&vulkanLBFGS->collectionOsoCalcGradients, 1); + + + //vulkanLBFGS->applyCG2Consts.grad_mult = 0; + vulkanLBFGS->applyCGConsts.pad = SIZES[0] * SIZES[1] * SIZES[2]; + vulkanLBFGS->applyCGConsts.dt = launchConfiguration.gamma / 0.176085964411; + createApplyCG2(&vulkanLBFGS->collectionApply2); + + createApplyCG1(&vulkanLBFGS->collectionApply1); + + scalar gamma_transfer[2]; + gamma_transfer[0] = 0; + gamma_transfer[1] = 0; + void* map; + vkMapMemory(device, vulkanReduce.deviceMemoryLastMax, 16 * sizeof(scalar), 2 * sizeof(scalar), 0, &map); + memcpy(map, gamma_transfer, 2 * sizeof(scalar)); + vkUnmapMemory(device, vulkanReduce.deviceMemoryLastMax); + + vulkanLBFGS->ReduceDotConsts[0] = nos; + vulkanLBFGS->ReduceDotConsts[1] = 0; + vulkanLBFGS->ReduceDotConsts[2] = 0; + vulkanLBFGS->ReduceDotConsts[3] = SIZES[0] * SIZES[1]; + createReduceFullDataFinish(&vulkanLBFGS->collectionReduceEnergyFinish, &vulkanReduce, 9); + //createReduceMaxFinish(&vulkanLBFGS->collectionReduceMaxFinish, &vulkanReduce, 1); + createCommandBufferFullCG(); + vulkanLBFGS->submitInfo.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO; + vulkanLBFGS->submitInfo.commandBufferCount = 1; + vulkanLBFGS->submitInfo.pCommandBuffers = &commandBufferFullCG; + + + }; + void deleteCollectionCG(VulkanLBFGS* vulkanLBFGS) { + deleteCollection(&vulkanLBFGS->collectionReduceDotFinish); + //deleteCollection(&vulkanLBFGS->collectionReduceMaxFinish); + deleteCollection(&vulkanLBFGS->collectionApply2); + deleteCollection(&vulkanLBFGS->collectionApply1); + deleteCollection(&vulkanLBFGS->collectionReduceEnergyFinish); + vkFreeCommandBuffers(device, commandPool, 1, &commandBufferFullCG); + vulkanLBFGS = NULL; + }; + void CG_iterate() { + + vkQueueSubmit(queue, 1, &vulkanLBFGS.submitInfo, fence); + vkWaitForFences(device, 1, &fence, VK_TRUE, 100000000000); + vkResetFences(device, 1, &fence); + + } + void createCommandBufferFullCG() { + + VkCommandBufferAllocateInfo commandBufferAllocateInfo = {}; + commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + commandBufferAllocateInfo.commandPool = commandPool; + commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + commandBufferAllocateInfo.commandBufferCount = 1; + vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &commandBufferFullCG); + VkCommandBufferBeginInfo commandBufferBeginInfo = {}; + commandBufferBeginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + commandBufferBeginInfo.flags = NULL; + VK_CHECK_RESULT(vkBeginCommandBuffer(commandBufferFullCG, &commandBufferBeginInfo)); + recordFullBufferCG(&commandBufferFullCG); + VK_CHECK_RESULT(vkEndCommandBuffer(commandBufferFullCG)); + } + void createApplyCG2(VulkanCollection* collection) { + { + VkDescriptorPoolSize descriptorPoolSize[1] = { }; + descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptorPoolSize[0].descriptorCount = 4; + //collection->descriptorNum = descriptorPoolSize[0].descriptorCount; + + VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {}; + descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptorPoolCreateInfo.poolSizeCount = COUNT_OF(descriptorPoolSize); + descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize; + descriptorPoolCreateInfo.maxSets = 1; + vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &collection[0].descriptorPool); + } + + { + const VkDescriptorType descriptorType[] = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_DESCRIPTOR_TYPE_STORAGE_BUFFER }; + collection[0].descriptorSetLayouts = (VkDescriptorSetLayout*)malloc(sizeof(VkDescriptorSetLayout)); + collection[0].descriptorSets = (VkDescriptorSet*)malloc(sizeof(VkDescriptorSet)); + collection->descriptorNum = 1; + VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[COUNT_OF(descriptorType)]; + for (uint32_t i = 0; i < COUNT_OF(descriptorSetLayoutBindings); ++i) { + descriptorSetLayoutBindings[i].binding = i; + descriptorSetLayoutBindings[i].descriptorType = descriptorType[i]; + descriptorSetLayoutBindings[i].descriptorCount = 1; + descriptorSetLayoutBindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + } + VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {}; + descriptorSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptorSetLayoutCreateInfo.bindingCount = COUNT_OF(descriptorSetLayoutBindings); + descriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindings; + vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, collection[0].descriptorSetLayouts); + VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = { }; + descriptorSetAllocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptorSetAllocateInfo.descriptorPool = collection[0].descriptorPool; + descriptorSetAllocateInfo.descriptorSetCount = 1; + descriptorSetAllocateInfo.pSetLayouts = collection[0].descriptorSetLayouts; + vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, collection[0].descriptorSets); + for (uint32_t i = 0; i < COUNT_OF(descriptorType); ++i) { + VkDescriptorBufferInfo descriptorBufferInfo = { }; + if (i == 0) { + descriptorBufferInfo.buffer = bufferGradient; + descriptorBufferInfo.offset = bufferSizeGradient; + descriptorBufferInfo.range = bufferSizeGradient; + } + if (i == 1) { + descriptorBufferInfo.buffer = vulkanLBFGS.buffer[0]; + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = vulkanLBFGS.bufferSizes[0]; + } + if (i == 2) { + descriptorBufferInfo.buffer = bufferSpins; + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = bufferSizeSpins; + } + if (i == 3) { + descriptorBufferInfo.buffer = vulkanReduce.lastMax; + descriptorBufferInfo.offset = 16 * sizeof(float); + descriptorBufferInfo.range = 2 * sizeof(float); + } + + + VkWriteDescriptorSet writeDescriptorSet = { }; + writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writeDescriptorSet.dstSet = collection[0].descriptorSets[0]; + writeDescriptorSet.dstBinding = i; + writeDescriptorSet.dstArrayElement = 0; + writeDescriptorSet.descriptorType = descriptorType[i]; + writeDescriptorSet.descriptorCount = 1; + writeDescriptorSet.pBufferInfo = &descriptorBufferInfo; + vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, NULL); + } + } + + { + + collection[0].pipelines = (VkPipeline*)malloc(sizeof(VkPipeline)); + VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { }; + pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineLayoutCreateInfo.setLayoutCount = 1; + pipelineLayoutCreateInfo.pSetLayouts = collection[0].descriptorSetLayouts; + VkPushConstantRange pushConstantRange = {}; + pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pushConstantRange.offset = 0; + pushConstantRange.size = 2*sizeof(scalar); + // Push constant ranges are part of the pipeline layout + pipelineLayoutCreateInfo.pushConstantRangeCount = 1; + pipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; + vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, NULL, &collection[0].pipelineLayout); + VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo = { }; + VkComputePipelineCreateInfo computePipelineCreateInfo = { }; + uint32_t specialization = 32; + + std::array specializationMapEntries; + specializationMapEntries[0].constantID = 1; + specializationMapEntries[0].size = sizeof(uint32_t); + specializationMapEntries[0].offset = 0; + + VkSpecializationInfo specializationInfo{}; + specializationInfo.dataSize = sizeof(uint32_t); + specializationInfo.mapEntryCount = static_cast(specializationMapEntries.size()); + specializationInfo.pMapEntries = specializationMapEntries.data(); + specializationInfo.pData = &specialization; + + + + pipelineShaderStageCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; + pipelineShaderStageCreateInfo.pSpecializationInfo = &specializationInfo; + uint32_t filelength; + uint32_t* code; + if (launchConfiguration.double_precision_rotate == true) + code = readShader(filelength, "shaders/ApplyCG2_double.spv"); + else + code = readShader(filelength, "shaders/ApplyCG2_float.spv"); + VkShaderModuleCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + createInfo.pCode = code; + createInfo.codeSize = filelength; + vkCreateShaderModule(device, &createInfo, NULL, &pipelineShaderStageCreateInfo.module); + delete[] code; + pipelineShaderStageCreateInfo.pName = "main"; + computePipelineCreateInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + computePipelineCreateInfo.stage = pipelineShaderStageCreateInfo; + computePipelineCreateInfo.layout = collection[0].pipelineLayout; + vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &computePipelineCreateInfo, NULL, collection[0].pipelines); + vkDestroyShaderModule(device, pipelineShaderStageCreateInfo.module, NULL); + } + { + VkCommandBufferAllocateInfo commandBufferAllocateInfo = {}; + commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + commandBufferAllocateInfo.commandPool = commandPool; + commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + commandBufferAllocateInfo.commandBufferCount = 1; + VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &collection[0].commandBuffer)); // allocate command buffer. + vulkanLBFGS.applyCGConsts.pad = SIZES[0] * SIZES[1] * SIZES[2]; + recordApplyCG2(collection); + } + } + void recordApplyCG2(VulkanCollection* collection) { + + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + VK_CHECK_RESULT(vkBeginCommandBuffer(collection[0].commandBuffer, &beginInfo)); // start recording commands. + vkCmdPushConstants(collection[0].commandBuffer, collection[0].pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0,2* sizeof(scalar), &vulkanLBFGS.applyCGConsts); + vkCmdBindPipeline(collection[0].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelines[0]); + vkCmdBindDescriptorSets(collection[0].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelineLayout, 0, 1, &collection[0].descriptorSets[0], 0, NULL); + vkCmdDispatch(collection[0].commandBuffer, (uint32_t)ceil(SIZES[0] * SIZES[1] * SIZES[2] / 1024.0f), 1, 1); + VK_CHECK_RESULT(vkEndCommandBuffer(collection[0].commandBuffer)); // end recording commands. + } + void recordApplyCG2Append(VulkanCollection* collection, VkCommandBuffer* commandBuffer) { + + vkCmdPushConstants(commandBuffer[0], collection[0].pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 2*sizeof(scalar), &vulkanLBFGS.applyCGConsts); + vkCmdBindPipeline(commandBuffer[0], VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelines[0]); + vkCmdBindDescriptorSets(commandBuffer[0], VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelineLayout, 0, 1, &collection[0].descriptorSets[0], 0, NULL); + vkCmdDispatch(commandBuffer[0], (uint32_t)ceil(SIZES[0] * SIZES[1] * SIZES[2] / 1024.0f), 1, 1); + + } + void recordFullBufferCG(VkCommandBuffer* commandBuffer) { + int nos = SIZES[0] * SIZES[1] * SIZES[2]; + VkMemoryBarrier memory_barrier = { + VK_STRUCTURE_TYPE_MEMORY_BARRIER, + nullptr, + VK_ACCESS_SHADER_WRITE_BIT, + VK_ACCESS_SHADER_READ_BIT,//VK_ACCESS_MEMORY_READ_BIT | VK_ACCESS_MEMORY_WRITE_BIT, + }; + //int groupedIterations = 1;// std::max(1, 8 * 1024 * 1024 / nos); + for (int i = 0; i < launchConfiguration.groupedIterations; i++) { + if (launchConfiguration.DDI == true) { + app_convolution.VkFFTAppend(commandBuffer[0]); + } + recordComputeGradients_noDDIAppend(&collectionGradients_noDDI_save, commandBuffer); + vkCmdPipelineBarrier(commandBuffer[0], VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &memory_barrier, 0, NULL, 0, NULL); + if (nos > 1024) { + recordReduceMaxFinishAppend(&vulkanLBFGS.collectionReduceEnergyFinish, commandBuffer, &vulkanReduce, &memory_barrier); + //recordReduceMaxFinishAppend(&vulkanLBFGS.collectionReduceMaxFinish, commandBuffer, &vulkanReduce, &memory_barrier); + + } + + + recordApplyCG1Append(&vulkanLBFGS.collectionApply1, commandBuffer); + vkCmdPipelineBarrier(commandBuffer[0], VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &memory_barrier, 0, NULL, 0, NULL); + + if (nos > 1024) { + recordReduceDotFinishAppend(&vulkanLBFGS.collectionReduceDotFinish, commandBuffer, &vulkanReduce, &memory_barrier); + } + recordApplyCG2Append(&vulkanLBFGS.collectionApply2, commandBuffer); + vkCmdPipelineBarrier(commandBuffer[0], VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 1, &memory_barrier, 0, NULL, 0, NULL); + + /*if (nos > 1024) { + recordReduceMaxFinishAppend(&vulkanLBFGS.collectionReduceMaxFinish, commandBuffer, &vulkanReduce, &memory_barrier); + }*/ + } + } + void createApplyCG1(VulkanCollection* collection) { + { + VkDescriptorPoolSize descriptorPoolSize[1] = { }; + descriptorPoolSize[0].type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; + descriptorPoolSize[0].descriptorCount = 2; + //collection->descriptorNum = descriptorPoolSize[0].descriptorCount; + + VkDescriptorPoolCreateInfo descriptorPoolCreateInfo = {}; + descriptorPoolCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO; + descriptorPoolCreateInfo.poolSizeCount = COUNT_OF(descriptorPoolSize); + descriptorPoolCreateInfo.pPoolSizes = descriptorPoolSize; + descriptorPoolCreateInfo.maxSets = 1; + vkCreateDescriptorPool(device, &descriptorPoolCreateInfo, NULL, &collection[0].descriptorPool); + } + + { + const VkDescriptorType descriptorType[] = { VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,VK_DESCRIPTOR_TYPE_STORAGE_BUFFER}; + collection[0].descriptorSetLayouts = (VkDescriptorSetLayout*)malloc(sizeof(VkDescriptorSetLayout)); + collection[0].descriptorSets = (VkDescriptorSet*)malloc(sizeof(VkDescriptorSet)); + collection->descriptorNum = 1; + VkDescriptorSetLayoutBinding descriptorSetLayoutBindings[COUNT_OF(descriptorType)]; + for (uint32_t i = 0; i < COUNT_OF(descriptorSetLayoutBindings); ++i) { + descriptorSetLayoutBindings[i].binding = i; + descriptorSetLayoutBindings[i].descriptorType = descriptorType[i]; + descriptorSetLayoutBindings[i].descriptorCount = 1; + descriptorSetLayoutBindings[i].stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + } + VkDescriptorSetLayoutCreateInfo descriptorSetLayoutCreateInfo = {}; + descriptorSetLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO; + descriptorSetLayoutCreateInfo.bindingCount = COUNT_OF(descriptorSetLayoutBindings); + descriptorSetLayoutCreateInfo.pBindings = descriptorSetLayoutBindings; + vkCreateDescriptorSetLayout(device, &descriptorSetLayoutCreateInfo, NULL, collection[0].descriptorSetLayouts); + VkDescriptorSetAllocateInfo descriptorSetAllocateInfo = { }; + descriptorSetAllocateInfo.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO; + descriptorSetAllocateInfo.descriptorPool = collection[0].descriptorPool; + descriptorSetAllocateInfo.descriptorSetCount = 1; + descriptorSetAllocateInfo.pSetLayouts = collection[0].descriptorSetLayouts; + vkAllocateDescriptorSets(device, &descriptorSetAllocateInfo, collection[0].descriptorSets); + for (uint32_t i = 0; i < COUNT_OF(descriptorType); ++i) { + VkDescriptorBufferInfo descriptorBufferInfo = { }; + if (i == 0) { + descriptorBufferInfo.buffer = bufferGradient; + descriptorBufferInfo.offset = bufferSizeGradient; + descriptorBufferInfo.range = bufferSizeGradient; + } + if (i == 1) { + descriptorBufferInfo.buffer = vulkanReduce.buffer[0]; + descriptorBufferInfo.offset = 0; + descriptorBufferInfo.range = vulkanReduce.sizes[0] * sizeof(float); + } + + VkWriteDescriptorSet writeDescriptorSet = { }; + writeDescriptorSet.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; + writeDescriptorSet.dstSet = collection[0].descriptorSets[0]; + writeDescriptorSet.dstBinding = i; + writeDescriptorSet.dstArrayElement = 0; + writeDescriptorSet.descriptorType = descriptorType[i]; + writeDescriptorSet.descriptorCount = 1; + writeDescriptorSet.pBufferInfo = &descriptorBufferInfo; + vkUpdateDescriptorSets(device, 1, &writeDescriptorSet, 0, NULL); + } + } + + { + + collection[0].pipelines = (VkPipeline*)malloc(sizeof(VkPipeline)); + VkPipelineLayoutCreateInfo pipelineLayoutCreateInfo = { }; + pipelineLayoutCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO; + pipelineLayoutCreateInfo.setLayoutCount = 1; + pipelineLayoutCreateInfo.pSetLayouts = collection[0].descriptorSetLayouts; + VkPushConstantRange pushConstantRange = {}; + pushConstantRange.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT; + pushConstantRange.offset = 0; + pushConstantRange.size = 2*sizeof(scalar); + // Push constant ranges are part of the pipeline layout + pipelineLayoutCreateInfo.pushConstantRangeCount = 1; + pipelineLayoutCreateInfo.pPushConstantRanges = &pushConstantRange; + vkCreatePipelineLayout(device, &pipelineLayoutCreateInfo, NULL, &collection[0].pipelineLayout); + VkPipelineShaderStageCreateInfo pipelineShaderStageCreateInfo = { }; + VkComputePipelineCreateInfo computePipelineCreateInfo = { }; + + struct SpecializationData { + uint32_t local_size_x_id; + uint32_t sumSubGroupSize; + } specializationData; + specializationData.local_size_x_id = 1024; + specializationData.sumSubGroupSize = 32; + std::array specializationMapEntries; + specializationMapEntries[0].constantID = 1; + specializationMapEntries[0].size = sizeof(uint32_t); + specializationMapEntries[0].offset = 0; + specializationMapEntries[1].constantID = 2; + specializationMapEntries[1].size = sizeof(uint32_t); + specializationMapEntries[1].offset = sizeof(uint32_t); + + VkSpecializationInfo specializationInfo{}; + specializationInfo.dataSize = 2 * sizeof(uint32_t); + specializationInfo.mapEntryCount = static_cast(specializationMapEntries.size()); + specializationInfo.pMapEntries = specializationMapEntries.data(); + specializationInfo.pData = &specializationData; + + pipelineShaderStageCreateInfo.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO; + pipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; + uint32_t filelength; + uint32_t* code = readShader(filelength, "shaders/ApplyCG1.spv"); + VkShaderModuleCreateInfo createInfo = {}; + createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; + createInfo.pCode = code; + createInfo.codeSize = filelength; + vkCreateShaderModule(device, &createInfo, NULL, &pipelineShaderStageCreateInfo.module); + delete[] code; + pipelineShaderStageCreateInfo.pName = "main"; + pipelineShaderStageCreateInfo.pSpecializationInfo = &specializationInfo; + computePipelineCreateInfo.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO; + computePipelineCreateInfo.stage = pipelineShaderStageCreateInfo; + computePipelineCreateInfo.layout = collection[0].pipelineLayout; + + + + vkCreateComputePipelines(device, VK_NULL_HANDLE, 1, &computePipelineCreateInfo, NULL, collection[0].pipelines); + vkDestroyShaderModule(device, pipelineShaderStageCreateInfo.module, NULL); + } + { + VkCommandBufferAllocateInfo commandBufferAllocateInfo = {}; + commandBufferAllocateInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO; + commandBufferAllocateInfo.commandPool = commandPool; + commandBufferAllocateInfo.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY; + commandBufferAllocateInfo.commandBufferCount = 1; + VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &commandBufferAllocateInfo, &collection[0].commandBuffer)); // allocate command buffer. + VkCommandBufferBeginInfo beginInfo = {}; + beginInfo.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO; + VK_CHECK_RESULT(vkBeginCommandBuffer(collection[0].commandBuffer, &beginInfo)); // start recording commands. + vkCmdPushConstants(collection[0].commandBuffer, collection[0].pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 2*sizeof(uint32_t), &vulkanLBFGS.applyCGConsts); + vkCmdBindPipeline(collection[0].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelines[0]); + vkCmdBindDescriptorSets(collection[0].commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelineLayout, 0, 1, &collection[0].descriptorSets[0], 0, NULL); + vkCmdDispatch(collection[0].commandBuffer, vulkanReduce.sizes[0], 1, 1); + VK_CHECK_RESULT(vkEndCommandBuffer(collection[0].commandBuffer)); // end recording commands. + + } + } + void recordApplyCG1Append(VulkanCollection* collection, VkCommandBuffer* commandBuffer) { + vkCmdPushConstants(commandBuffer[0], collection[0].pipelineLayout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 2* sizeof(uint32_t), &vulkanLBFGS.applyCGConsts); + vkCmdBindPipeline(commandBuffer[0], VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelines[0]); + vkCmdBindDescriptorSets(commandBuffer[0], VK_PIPELINE_BIND_POINT_COMPUTE, collection[0].pipelineLayout, 0, 1, &collection[0].descriptorSets[0], 0, NULL); + vkCmdDispatch(commandBuffer[0], vulkanReduce.sizes[0], 1, 1); + + } + //Depondt void initDepondt() { int n = SIZES[0] * SIZES[1] * SIZES[2]; @@ -6849,10 +7317,10 @@ namespace VulkanCompute pipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; uint32_t filelength; uint32_t* code; - if (launchConfiguration.double_precision_rotate == true) + /*if (launchConfiguration.double_precision_rotate == true) code = readShader(filelength, "shaders/ApplyRK4_1_double.spv"); - else - code = readShader(filelength, "shaders/ApplyRK4_1_float.spv"); + else*/ + code = readShader(filelength, "shaders/ApplyRK4_1_float.spv"); VkShaderModuleCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; @@ -6992,10 +7460,10 @@ namespace VulkanCompute pipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; uint32_t filelength; uint32_t* code; - if (launchConfiguration.double_precision_rotate == true) + /*if (launchConfiguration.double_precision_rotate == true) code = readShader(filelength, "shaders/ApplyRK4_2_double.spv"); - else - code = readShader(filelength, "shaders/ApplyRK4_2_float.spv"); + else*/ + code = readShader(filelength, "shaders/ApplyRK4_2_float.spv"); VkShaderModuleCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; createInfo.pCode = code; @@ -7132,10 +7600,10 @@ namespace VulkanCompute pipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; uint32_t filelength; uint32_t* code; - if (launchConfiguration.double_precision_rotate == true) + /*if (launchConfiguration.double_precision_rotate == true) code = readShader(filelength, "shaders/ApplyRK4_3_double.spv"); - else - code = readShader(filelength, "shaders/ApplyRK4_3_float.spv"); + else*/ + code = readShader(filelength, "shaders/ApplyRK4_3_float.spv"); VkShaderModuleCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; createInfo.pCode = code; @@ -7284,10 +7752,10 @@ namespace VulkanCompute pipelineShaderStageCreateInfo.stage = VK_SHADER_STAGE_COMPUTE_BIT; uint32_t filelength; uint32_t* code; - if (launchConfiguration.double_precision_rotate == true) + /*if (launchConfiguration.double_precision_rotate == true) code = readShader(filelength, "shaders/ApplyRK4_4_double.spv"); - else - code = readShader(filelength, "shaders/ApplyRK4_4_float.spv"); + else*/ + code = readShader(filelength, "shaders/ApplyRK4_4_float.spv"); VkShaderModuleCreateInfo createInfo = {}; createInfo.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO; createInfo.pCode = code; @@ -7441,11 +7909,6 @@ namespace VulkanCompute DestroyDebugUtilsMessengerEXT(instance, debugMessenger, nullptr); } - if (launchConfiguration.solver_type == 0) deleteLBFGS(); - if (launchConfiguration.solver_type == 1) deleteVP(); - if (launchConfiguration.solver_type == 2) deleteDepondt(); - if (launchConfiguration.solver_type == 3) deleteRK4(); - if (launchConfiguration.DDI ) { free_DDI(); } diff --git a/core/include/engine/CMakeLists.txt b/core/include/engine/CMakeLists.txt index b73e76a6a..9c08d4738 100644 --- a/core/include/engine/CMakeLists.txt +++ b/core/include/engine/CMakeLists.txt @@ -13,6 +13,7 @@ set(HEADER_SPIRIT_ENGINE ${CMAKE_CURRENT_SOURCE_DIR}/Solver_RK4.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Solver_VP.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Solver_VP_OSO.hpp + ${CMAKE_CURRENT_SOURCE_DIR}/Solver_CG_OSO.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Solver_LBFGS_OSO.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Solver_LBFGS_Atlas.hpp ${CMAKE_CURRENT_SOURCE_DIR}/Method.hpp diff --git a/core/include/engine/Method_Solver.hpp b/core/include/engine/Method_Solver.hpp index 6de00b28e..c2523f8a9 100644 --- a/core/include/engine/Method_Solver.hpp +++ b/core/include/engine/Method_Solver.hpp @@ -39,7 +39,8 @@ namespace Engine LBFGS_OSO = Solver_LBFGS_OSO, LBFGS_Atlas = Solver_LBFGS_Atlas, VP = Solver_VP, - VP_OSO = Solver_VP_OSO + VP_OSO = Solver_VP_OSO, + CG_OSO = Solver_CG_OSO }; /* @@ -358,6 +359,7 @@ namespace Engine #include #include #include + #include #include #include } diff --git a/core/include/engine/Solver_CG_OSO.hpp b/core/include/engine/Solver_CG_OSO.hpp new file mode 100644 index 000000000..53882e75f --- /dev/null +++ b/core/include/engine/Solver_CG_OSO.hpp @@ -0,0 +1,66 @@ +template <> inline +void Method_Solver::Initialize () +{ + this->iterations = 0; + this->systems[0]->app.freeLastSolver(); + this->systems[0]->app.init_solver(4); + +}; + + + +template <> inline +void Method_Solver::Iteration () +{ + + this->systems[0]->app.CG_iterate(); + + if (iterations % this->systems[0]->app.launchConfiguration.savePeriod == 0) { + + int hopf_radii[256]; + int k = 0; + if (allow_copy == true) { + allow_copy = false; + this->systems[0]->app.writeSpins((scalar*)((*this->configurations[0]).data()), &allow_copy); + + } + if (allow_copy2 == true) { + allow_copy2 = false; + this->systems[0]->app.writeGradient(this->systems[0]->hamiltonian->gradient_contributions_per_spin, &allow_copy2); + } + scalar energy[5]; + scalar energy_full = 0; + Vector3 meanMag; + scalar max_Force; + scalar time; + this->systems[0]->app.getEnergy(energy, &meanMag, &max_Force, &time); + energy[0] *= 2; + for (int i = 0; i < 5; i++) { + energy_full += energy[i]; + this->systems[0]->hamiltonian->energy_array[i].second = energy[i]; + } + + this->systems[0]->M = meanMag / (this->systems[0]->geometry->nos); + //scalar max_Force =this->systems[0]->app.getMaxForce(); + if (this->force_max_abs_component == sqrt(max_Force)) this->systems[0]->iteration_allowed = false; + this->force_max_abs_component = sqrt(max_Force); + if (this->force_max_abs_component < this->systems[0]->app.launchConfiguration.maxTorque) this->systems[0]->iteration_allowed = false; + std::cout << "iteration: " << iterations << " maxTorque: " << this->force_max_abs_component << " Mx: " << this->systems[0]->M[0] << " My: " << this->systems[0]->M[1] << " Mz: " << this->systems[0]->M[2] << " Efull: " << energy_full / this->systems[0]->geometry->nos << " Ezeeman: " << energy[0] / this->systems[0]->geometry->nos << " Eanis: " << energy[1] / this->systems[0]->geometry->nos << " Eexch: " << energy[2] / this->systems[0]->geometry->nos << " Edmi: " << energy[3] / this->systems[0]->geometry->nos << " Eddi: " << energy[4] / this->systems[0]->geometry->nos << "\n"; + + } + + iterations++; + +} + +template <> inline +std::string Method_Solver::SolverName() +{ + return "CG_OSO"; +}; + +template <> inline +std::string Method_Solver::SolverFullName() +{ + return "CG using exponential transforms"; +}; \ No newline at end of file diff --git a/core/include/engine/Solver_LBFGS_OSO.hpp b/core/include/engine/Solver_LBFGS_OSO.hpp index dad1a8d5b..056eae966 100644 --- a/core/include/engine/Solver_LBFGS_OSO.hpp +++ b/core/include/engine/Solver_LBFGS_OSO.hpp @@ -155,7 +155,10 @@ void Method_Solver::Iteration() //this->systems[0]->hamiltonian->energy_contributions_per_spin[0].second[0] = energy_full; this->systems[0]->M = meanMag/ (this->systems[0]->geometry->nos); //scalar max_Force =this->systems[0]->app.getMaxForce(); + //if (this->force_max_abs_component == sqrt(max_Force)) this->systems[0]->iteration_allowed = false; + this->force_max_abs_component = sqrt(max_Force); + if (this->force_max_abs_component < this->systems[0]->app.launchConfiguration.maxTorque) this->systems[0]->iteration_allowed = false; //std::cout << "maxTorque: " << this->force_max_abs_component<<" Mx: " << this->systems[0]->M[0] << " My: " << this->systems[0]->M[1] << " Mz: " << this->systems[0]->M[2] << " m_sum: " << this->systems[0]->M[0]+ this->systems[0]->M[1]+this->systems[0]->M[2] <<" Efull: " << energy_full / this->systems[0]->geometry->nos << " Ezeeman: " << energy[0] / this->systems[0]->geometry->nos << " Eanis: " << energy[1] / this->systems[0]->geometry->nos << " Eexch: " << energy[2] / this->systems[0]->geometry->nos << " Edmi: " << energy[3] / this->systems[0]->geometry->nos << " Eddi: " << energy[4] / this->systems[0]->geometry->nos << "\n"; /*if (iterations == 0) { std::ofstream outfile; diff --git a/core/include/engine/Solver_VP_OSO.hpp b/core/include/engine/Solver_VP_OSO.hpp index 05fc980b9..69e84ecfb 100644 --- a/core/include/engine/Solver_VP_OSO.hpp +++ b/core/include/engine/Solver_VP_OSO.hpp @@ -130,8 +130,9 @@ void Method_Solver::Iteration () //this->systems[0]->M = meanMag / this->systems[0]->geometry->nos; this->systems[0]->M = meanMag / (this->systems[0]->geometry->nos); //scalar max_Force =this->systems[0]->app.getMaxForce(); + //if (this->force_max_abs_component == sqrt(max_Force)) this->systems[0]->iteration_allowed = false; this->force_max_abs_component = sqrt(max_Force); - + if (this->force_max_abs_component < this->systems[0]->app.launchConfiguration.maxTorque) this->systems[0]->iteration_allowed = false; std::cout << "iteration: " << iterations << " maxTorque: " << this->force_max_abs_component << " Mx: " << this->systems[0]->M[0] << " My: " << this->systems[0]->M[1] << " Mz: " << this->systems[0]->M[2] << " Efull: " << energy_full / this->systems[0]->geometry->nos << " Ezeeman: " << energy[0] / this->systems[0]->geometry->nos << " Eanis: " << energy[1] / this->systems[0]->geometry->nos << " Eexch: " << energy[2] / this->systems[0]->geometry->nos << " Edmi: " << energy[3] / this->systems[0]->geometry->nos << " Eddi: " << energy[4] / this->systems[0]->geometry->nos << "\n"; //this->systems[0]->app.writeGradient((*this->configurations[0]).data()); diff --git a/core/src/Spirit/Hamiltonian.cpp b/core/src/Spirit/Hamiltonian.cpp index 4128427ec..2baeab69f 100644 --- a/core/src/Spirit/Hamiltonian.cpp +++ b/core/src/Spirit/Hamiltonian.cpp @@ -736,6 +736,53 @@ void Hamiltonian_Set_DMI_Tensor(State *state, float dmi_tensor[2], int region_id spirit_handle_exception_api(idx_image, idx_chain); } } +void Hamiltonian_Set_damping(State* state, float alpha, int region_id, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->Lock(); + + try + { + if (image->hamiltonian->Name() == "Micromagnetic") + { + // Update the Hamiltonian + auto ham = (Engine::Hamiltonian_Micromagnetic*)image->hamiltonian.get(); + std::string message; + + ham->regions_book[region_id].alpha = alpha; + message = fmt::format("Set region {} damping = {}", region_id, alpha); + + image->app.updateRegionsBook(ham->regions_book, ham->region_num); + ham->Update_Interactions(); + + + //message += fmt::format("{} {} {}\n", dmi_tensor[3],dmi_tensor[4],dmi_tensor[5]); + //message += fmt::format("{} {} {}", dmi_tensor[6],dmi_tensor[7],dmi_tensor[8]); + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, message, idx_image, idx_chain); + } + else + Log(Utility::Log_Level::Warning, Utility::Log_Sender::API, + "Exchange cannot be set on " + image->hamiltonian->Name(), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } + + image->Unlock(); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} void Hamiltonian_Set_DDI_coefficient(State* state, float ddi, int region_id, int idx_image, int idx_chain) noexcept { try @@ -1308,7 +1355,28 @@ void Hamiltonian_Get_DMI_Tensor(State *state, float * dmi_tensor, int region_id, spirit_handle_exception_api(idx_image, idx_chain); } } +void Hamiltonian_Get_damping(State* state, float* alpha, int region_id, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + if (image->hamiltonian->Name() == "Micromagnetic") + { + auto ham = (Engine::Hamiltonian_Micromagnetic*)image->hamiltonian.get(); + alpha[0] = ham->regions_book[region_id].alpha; + } + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} void Hamiltonian_Get_DDI_coefficient(State* state, float* ddi, int region_id, int idx_image, int idx_chain) noexcept { try diff --git a/core/src/Spirit/Parameters_LLG.cpp b/core/src/Spirit/Parameters_LLG.cpp index 8b5e5431f..ffde47a5a 100644 --- a/core/src/Spirit/Parameters_LLG.cpp +++ b/core/src/Spirit/Parameters_LLG.cpp @@ -630,4 +630,244 @@ void Parameters_LLG_Get_STT( State *state, bool * use_gradient, float * magnitud { spirit_handle_exception_api(idx_image, idx_chain); } +} + +//micromagnetics functions + +void Parameters_LLG_Set_dt(State* state, float dt, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->app.launchConfiguration.gamma= dt * 0.176085964411; + image->app.init_solver(image->app.launchConfiguration.solver_type); + + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, + fmt::format("Set: LLG time step = {}", dt), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Set_max_torque(State* state, float maxTorque, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->app.launchConfiguration.maxTorque = maxTorque; + image->app.init_solver(image->app.launchConfiguration.solver_type); + + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, + fmt::format("Set: LLG max torque = {}", maxTorque), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Set_max_move(State* state, float max_move, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->app.launchConfiguration.max_move = max_move; + image->app.init_solver(image->app.launchConfiguration.solver_type); + + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, + fmt::format("Set: LLG max move for LBFGS = {}", max_move), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} + +void Parameters_LLG_Set_n_LBFGS(State* state, int n_LBFGS, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->app.launchConfiguration.n_lbfgs_memory = n_LBFGS; + image->app.init_solver(image->app.launchConfiguration.solver_type); + + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, + fmt::format("Set: LLG number of stored steps for LBFGS = {}", n_LBFGS), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Set_grouped_iterations(State* state, int grouped_iterations, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->app.launchConfiguration.groupedIterations = grouped_iterations; + image->app.init_solver(image->app.launchConfiguration.solver_type); + + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, + fmt::format("Set: {} grouped iterations to perform them in one batch", grouped_iterations), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} + +void Parameters_LLG_Set_save_period(State* state, int save_period, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + image->app.launchConfiguration.savePeriod = save_period; + image->app.init_solver(image->app.launchConfiguration.solver_type); + + Log(Utility::Log_Level::Info, Utility::Log_Sender::API, + fmt::format("Set: save state after every {} batches", save_period), idx_image, idx_chain); + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} + +void Parameters_LLG_Get_dt(State* state, float* dt, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + *dt = (float)image->app.launchConfiguration.gamma/ 0.176085964411; + + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Get_max_torque(State* state, float* maxTorque, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + *maxTorque = (float)image->app.launchConfiguration.maxTorque; + + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Get_max_move(State* state, float* max_move, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + *max_move = (float)image->app.launchConfiguration.max_move; + + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Get_n_LBFGS(State* state, int* n_LBFGS, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + *n_LBFGS = (int)image->app.launchConfiguration.n_lbfgs_memory; + + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Get_grouped_iterations(State* state, int* grouped_iterations,int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + *grouped_iterations = (int)image->app.launchConfiguration.groupedIterations; + + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } +} +void Parameters_LLG_Get_save_period(State* state, int* save_period, int idx_image, int idx_chain) noexcept +{ + try + { + std::shared_ptr image; + std::shared_ptr chain; + + // Fetch correct indices and pointers + from_indices(state, idx_image, idx_chain, image, chain); + + *save_period = (int)image->app.launchConfiguration.savePeriod; + + } + catch (...) + { + spirit_handle_exception_api(idx_image, idx_chain); + } } \ No newline at end of file diff --git a/core/src/Spirit/Simulation.cpp b/core/src/Spirit/Simulation.cpp index 0c1c142aa..d8669f2ed 100644 --- a/core/src/Spirit/Simulation.cpp +++ b/core/src/Spirit/Simulation.cpp @@ -154,6 +154,9 @@ try else if (solver_type == int(Engine::Solver::VP_OSO)) method = std::shared_ptr( new Engine::Method_LLG( image, idx_image, idx_chain ) ); + else if (solver_type == int(Engine::Solver::CG_OSO)) + method = std::shared_ptr( + new Engine::Method_LLG(image, idx_image, idx_chain)); else spirit_throw(Utility::Exception_Classifier::Unknown_Exception, Utility::Log_Level::Warning, fmt::format( "Invalid solver_type {}", solver_type)); diff --git a/core/src/engine/Method_LLG.cpp b/core/src/engine/Method_LLG.cpp index e26a1d804..e9fda845f 100644 --- a/core/src/engine/Method_LLG.cpp +++ b/core/src/engine/Method_LLG.cpp @@ -499,4 +499,5 @@ namespace Engine template class Method_LLG; template class Method_LLG; template class Method_LLG; + template class Method_LLG; } diff --git a/core/src/io/Configparser.cpp b/core/src/io/Configparser.cpp index fd69f7c48..e3f049b35 100644 --- a/core/src/io/Configparser.cpp +++ b/core/src/io/Configparser.cpp @@ -1476,6 +1476,7 @@ namespace IO myfile.Read_Single(launchConfiguration.GPU_ID, "GPU_ID"); myfile.Read_Single(launchConfiguration.savePeriod, "save_period"); myfile.Read_Single(launchConfiguration.groupedIterations, "grouped_iterations"); + myfile.Read_Single(launchConfiguration.maxTorque, "max_torque"); std::string double_precision_rotate; myfile.Read_String(double_precision_rotate, "double_precision_rotate"); if (double_precision_rotate == "ON") @@ -1591,7 +1592,7 @@ namespace IO myfile.Read_Single(spatial_gradient_order, "spatial_gradient_order"); myfile.Read_Single(launchConfiguration.max_move, "max_move"); - launchConfiguration.max_move = 3.14159265358979 / launchConfiguration.max_move; + //launchConfiguration.max_move = 3.14159265358979 / launchConfiguration.max_move; myfile.Read_Single(launchConfiguration.n_lbfgs_memory, "n_lbfgs"); // Field //external_field_magnitude=scalarfield(region_num,0); diff --git a/input/input.cfg b/input/input.cfg index 3551d0b7a..4aabe26c6 100644 --- a/input/input.cfg +++ b/input/input.cfg @@ -34,79 +34,62 @@ hamiltonian micromagnetic GPU_ID 0 ### Boundary_conditions (a b c): 0(open), 1(periodical) - only for DDI right now -boundary_conditions 0 0 0 +boundary_conditions 1 1 0 -grouped_iterations 50 #group iterations to perform them in one batch -save_period 10 #save state afterevery X batches +grouped_iterations 1000 #group iterations to perform them in one batch +save_period 1 #save state afterevery X batches save_gradient_components OFF # save all gradient components at every save_period batches, they can be displayed in GUI double_precision_rotate OFF # rotational matrix is calculated in double double_precision_gradient OFF # gradient (except DDI) is calculated in double, and then stored as float +lbfgs_linesearch OFF adaptive_time_step OFF # experimental feature to modify timestep -correct_dt 1e-2 #in ps, used as starting timestep if adaptive_time_step is ON -max_move 200 # LBFGS parameter +correct_dt 1e-3 #in ps, used as starting timestep if adaptive_time_step is ON +max_move 0.015 # LBFGS parameter n_lbfgs 3 # LBFGS parameter -cell_sizes 1e-9 1e-9 20e-9 # in meters +max_torque 1e-3 +cell_sizes 0.125e-9 0.125e-9 1e-9 # in meters #cell_sizes 0.06e-9 0.06e-9 3e-9 #cell_sizes 0.9765625e-9 0.9765625e-9 3e-9 #cell_sizes 3.90625e-9 3.90625e-9 3e-9 #cell_sizes 2e-9 2e-9 5e-9 -number_regions 2 # number of regions. regions are defined in configparser.cpp for now (will be moved later) +number_regions 1 # number of regions. regions are defined in configparser.cpp for now (will be moved later) Ms -800e3 -800e3 +384e3 alpha -0.02 -0.02 +0.0 frozen_spins 0 -0 external_field_magnitude -0.0 -0.0 -#0.03605468069 -#0.03605468069 - +0.13051845 external_field_normal -1 0 0 -1 0 0 -#-0.9846155705 -0.1747345942 0 -#-0.9846155705 -0.1747345942 0 - - +0 0 1 number_anisotropies 1 -1 anisotropies_vectors 0 0 0 0 -0 0 0 0 exchange_stiffness -13e-12 -13e-12 +4.0e-12 dmi_bulk -1e-3 -1e-3 +0.0007853982 dmi_interface 0 -0 - -kernel_accuracy 6.0 -ddi -1.0 -1.0 +kernel_accuracy 12.0 +#ddi +#1.0 ################ End Hamiltonian ################# @@ -121,7 +104,7 @@ bravais_lattice sc ### Number of basis cells along principal ### directions (a b c) -n_basis_cells 1024 256 1 +n_basis_cells 32 32 64 ################# End Geometry ################### @@ -195,9 +178,9 @@ llg_max_walltime 0:0:0 llg_seed 20006 ### Number of iterations -llg_n_iterations 10000000 +llg_n_iterations 1000000000 ### Number of iterations after which to save -llg_n_iterations_log 20000 +llg_n_iterations_log 200000 ### Temperature [K] llg_temperature 0 diff --git a/shaders/ApplyCG1.comp b/shaders/ApplyCG1.comp new file mode 100644 index 000000000..bae442a91 --- /dev/null +++ b/shaders/ApplyCG1.comp @@ -0,0 +1,55 @@ +#version 450 +#extension GL_KHR_shader_subgroup_arithmetic : enable + +layout (local_size_x_id = 1) in; +layout (constant_id = 2) const int sumSubGroupSize = 64; + +layout(binding = 0) readonly buffer Data0 +{ + float grad []; +}; + +layout(std430, binding = 1) writeonly buffer PartialSums +{ + float partial_sums[]; +}; + +layout(push_constant) uniform PushConsts +{ + uint pad; + float dt; +} consts; + +shared float sdata[sumSubGroupSize]; + + +void main() { + float dot = 0.0; + + if ( gl_GlobalInvocationID.x < 3*consts.pad) + { + dot = grad[gl_GlobalInvocationID.x ]*grad[(gl_GlobalInvocationID.x )]; + + } + dot = subgroupAdd(dot); + + if (gl_SubgroupInvocationID == 0) + { + sdata[gl_SubgroupID] = dot; + + } + + memoryBarrierShared(); + barrier(); + + if (gl_SubgroupID == 0) + { + dot = gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID] : 0; + dot = subgroupAdd(dot); + + } + if (gl_LocalInvocationID.x == 0) + { + partial_sums[gl_WorkGroupID.x] = dot; + } +} diff --git a/shaders/ApplyCG1.spv b/shaders/ApplyCG1.spv new file mode 100644 index 000000000..e60e37745 Binary files /dev/null and b/shaders/ApplyCG1.spv differ diff --git a/shaders/ApplyCG2_double.comp b/shaders/ApplyCG2_double.comp new file mode 100644 index 000000000..d937dbe6a --- /dev/null +++ b/shaders/ApplyCG2_double.comp @@ -0,0 +1,117 @@ +#version 450 +#extension GL_ARB_gpu_shader_fp64 : enable +#extension GL_KHR_shader_subgroup_arithmetic : enable + +double sina_11(double x) +{ + //minimax coefs for sin for 0..pi/2 range + const double a3 = -1.666666660646699151540776973346659104119e-1LF; + const double a5 = 8.333330495671426021718370503012583606364e-3LF; + const double a7 = -1.984080403919620610590106573736892971297e-4LF; + const double a9 = 2.752261885409148183683678902130857814965e-6LF; + const double ab = -2.384669400943475552559273983214582409441e-8LF; + + const double m_2_pi = 0.636619772367581343076LF; + const double m_pi_2 = 1.57079632679489661923LF; + + double y = abs(x * m_2_pi); + double q = floor(y); + int quadrant = int(q); + + double t = (quadrant & 1) != 0 ? 1 - y + q : y - q; + t *= m_pi_2; + + double t2 = t * t; + double r = fma(fma(fma(fma(fma(ab, t2, a9), t2, a7), t2, a5), t2, a3), + t2*t, t); + + r = x < 0 ? -r : r; + + return (quadrant & 2) != 0 ? -r : r; +} +double cosa_11(double x) +{ + //sin(x + PI/2) = cos(x) + return sina_11(x + 1.57079632679489661923LF); +} +layout (local_size_x = 1024, local_size_y = 1,local_size_z = 1) in; +layout (constant_id = 1) const int sumSubGroupSize = 64; +layout(binding = 0) readonly buffer Data0 +{ + float grad []; +}; +layout(binding = 1) buffer Data2 +{ + float sd []; +}; +layout(binding = 2) buffer Data3 +{ + float s []; +}; + +layout(binding = 3) buffer Data4 +{ + float reduceResults[]; +}; + +layout(push_constant) uniform PushConsts +{ + uint pad; + float dt; + +} consts; + +void main() { + uint id_x=gl_GlobalInvocationID.x; + + vec3 s_loc; + s_loc.x=s[id_x]; + s_loc.y=s[id_x+consts.pad]; + s_loc.z=s[id_x+2*consts.pad]; + + dvec3 sd_loc; + if (reduceResults[1]==0){ + reduceResults[1]=reduceResults[0]; + sd_loc.x=-grad[id_x]; + sd_loc.y=-grad[id_x+consts.pad]; + sd_loc.z=-grad[id_x+2*consts.pad]; + } + else{ + double beta= reduceResults[0]/reduceResults[1]; + reduceResults[1]=reduceResults[0]; + sd_loc.x=beta*sd_loc.x-grad[id_x]; + sd_loc.y=beta*sd_loc.y-grad[id_x+consts.pad]; + sd_loc.z=beta*sd_loc.z-grad[id_x+2*consts.pad]; + } + sd_loc*=consts.dt; + sd[id_x]=float(sd_loc.x); + sd[id_x+consts.pad]=float(sd_loc.y); + sd[id_x+2*consts.pad]=float(sd_loc.z); + double theta = length(sd_loc); + if (theta > 1.0e-20) // if theta is too small we do nothing + { + double theta_inv = 1.0/theta; + double q = cosa_11(theta); + double w = 1 - q; + double x = -sd_loc.x* theta_inv; + double y = -sd_loc.y* theta_inv; + double z = -sd_loc.z* theta_inv; + + double s1 = x * z * w; + double s2 = x * z * w; + double s3 = z * y * w; + double sin_theta = sina_11(theta); + double p1 = z * sin_theta; + double p2 = y * sin_theta; + double p3 = x * sin_theta; + dvec3 t; + + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + t=normalize(t); + s[id_x] = float(t.x); + s[id_x+consts.pad] = float(t.y); + s[id_x+2*consts.pad] = float(t.z); + }; +} diff --git a/shaders/ApplyCG2_double.spv b/shaders/ApplyCG2_double.spv new file mode 100644 index 000000000..3b2844447 Binary files /dev/null and b/shaders/ApplyCG2_double.spv differ diff --git a/shaders/ApplyCG2_float.comp b/shaders/ApplyCG2_float.comp new file mode 100644 index 000000000..9417a8627 --- /dev/null +++ b/shaders/ApplyCG2_float.comp @@ -0,0 +1,83 @@ +#version 450 +#extension GL_KHR_shader_subgroup_arithmetic : enable + +layout (local_size_x = 1024, local_size_y = 1,local_size_z = 1) in; +layout (constant_id = 1) const int sumSubGroupSize = 64; +layout(binding = 0) readonly buffer Data0 +{ + float grad []; +}; +layout(binding = 1) buffer Data2 +{ + float sd []; +}; +layout(binding = 2) buffer Data3 +{ + float s []; +}; + +layout(binding = 3) buffer Data4 +{ + float reduceResults[]; +}; + +layout(push_constant) uniform PushConsts +{ + uint pad; + float dt; +} consts; + +void main() { + uint id_x=gl_GlobalInvocationID.x; + + vec3 s_loc; + s_loc.x=s[id_x]; + s_loc.y=s[id_x+consts.pad]; + s_loc.z=s[id_x+2*consts.pad]; + vec3 sd_loc; + if (reduceResults[1]==0){ + reduceResults[1]=reduceResults[0]; + sd_loc.x=-grad[id_x]; + sd_loc.y=-grad[id_x+consts.pad]; + sd_loc.z=-grad[id_x+2*consts.pad]; + } + else{ + float beta= reduceResults[0]/reduceResults[1]; + reduceResults[1]=reduceResults[0]; + sd_loc.x=beta*sd_loc.x-grad[id_x]; + sd_loc.y=beta*sd_loc.y-grad[id_x+consts.pad]; + sd_loc.z=beta*sd_loc.z-grad[id_x+2*consts.pad]; + } + sd_loc*=consts.dt; + sd[id_x]=sd_loc.x; + sd[id_x+consts.pad]=sd_loc.y; + sd[id_x+2*consts.pad]=sd_loc.z; + float theta = length(sd_loc); + if (theta > 1.0e-20) // if theta is too small we do nothing + { + + float theta_inv = 1.0/theta; + float q = cos(theta); + float w = 1 - q; + float x = -sd_loc.x* theta_inv; + float y = -sd_loc.y* theta_inv; + float z = -sd_loc.z* theta_inv; + float s1 = x * y * w; + float s2 = x * z * w; + float s3 = z * y * w; + float p1 = z * sin(theta); + float p2 = y * sin(theta); + float p3 = x * sin(theta); + vec3 t; + + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + t=normalize(t); + s[id_x] = t.x; + s[id_x+consts.pad] = t.y; + s[id_x+2*consts.pad] = t.z; + }; + + +} diff --git a/shaders/ApplyCG2_float.spv b/shaders/ApplyCG2_float.spv new file mode 100644 index 000000000..16ec1a40e Binary files /dev/null and b/shaders/ApplyCG2_float.spv differ diff --git a/shaders/ApplyDepondt2_double.comp b/shaders/ApplyDepondt2_double.comp index c39fb23ac..16affbcfd 100644 --- a/shaders/ApplyDepondt2_double.comp +++ b/shaders/ApplyDepondt2_double.comp @@ -71,10 +71,11 @@ shared float sdata[sumSubGroupSize]; void main() { uint id_x=gl_GlobalInvocationID.x; - double grad_corr_x=0.5*(grad[id_x]+grad_copy[id_x]); - double grad_corr_y=0.5*(grad[id_x+consts.pad]+grad_copy[id_x+consts.pad]); - double grad_corr_z=0.5*(grad[id_x+2*consts.pad]+grad_copy[id_x+2*consts.pad]); - double grad_norm = sqrt(grad_corr_x*grad_corr_x+grad_corr_y*grad_corr_y+grad_corr_z*grad_corr_z); + dvec3 grad_corr; + grad_corr.x=0.5*(grad[id_x]+grad_copy[id_x]); + grad_corr.y=0.5*(grad[id_x+consts.pad]+grad_copy[id_x+consts.pad]); + grad_corr.z=0.5*(grad[id_x+2*consts.pad]+grad_copy[id_x+2*consts.pad]); + double grad_norm = length(grad_corr); double gamma_dt= (consts.dt==0) ? 1e-4/gamma_dt_from_torque[0] : consts.dt; float max=0; @@ -83,9 +84,9 @@ void main() { double grad_norm_inv=1.0/grad_norm; double q = cosa_11(grad_norm*gamma_dt); double w = 1 - q; - double x = -grad_corr_x* grad_norm_inv; - double y = -grad_corr_y* grad_norm_inv; - double z = -grad_corr_z* grad_norm_inv; + double x = -grad_corr.x* grad_norm_inv; + double y = -grad_corr.y* grad_norm_inv; + double z = -grad_corr.z* grad_norm_inv; double s1 = x * y * w; double s2 = x * z * w; double s3 = z * y * w; @@ -93,22 +94,21 @@ void main() { double p1 = z * sin_grad; double p2 = y * sin_grad; double p3 = x * sin_grad; - double t1, t2, t3; + dvec3 t; - t1 = (q + x* x * w) * s_copy[id_x] + (s1 - p1) * s_copy[id_x+consts.pad] + (s2 + p2) * s_copy[id_x+2*consts.pad]; - t2 = (s1 + p1) * s_copy[id_x] + (q + y * y * w) * s_copy[id_x+consts.pad] + (s3 - p3) * s_copy[id_x+2*consts.pad]; - t3 = (s2 - p2) * s_copy[id_x] + (s3 + p3) * s_copy[id_x+consts.pad] + (q + z * z * w) * s_copy[id_x+2*consts.pad]; - //temp_x=s_copy[id_x]-t1; - //temp_y=s_copy[id_x+consts.pad]-t2; - //temp_z=s_copy[id_x+2*consts.pad]-t3; - s[id_x] = float(t1); - s[id_x+consts.pad] = float(t2); - s[id_x+2*consts.pad] = float(t3); + t.x = (q + x* x * w) * s_copy[id_x] + (s1 - p1) * s_copy[id_x+consts.pad] + (s2 + p2) * s_copy[id_x+2*consts.pad]; + t.y = (s1 + p1) * s_copy[id_x] + (q + y * y * w) * s_copy[id_x+consts.pad] + (s3 - p3) * s_copy[id_x+2*consts.pad]; + t.z = (s2 - p2) * s_copy[id_x] + (s3 + p3) * s_copy[id_x+consts.pad] + (q + z * z * w) * s_copy[id_x+2*consts.pad]; + t=normalize(t); + + s[id_x] = float(t.x); + s[id_x+consts.pad] = float(t.y); + s[id_x+2*consts.pad] = float(t.z); - float temp_x=grad[id_x]-grad_copy[id_x]; + /*float temp_x=grad[id_x]-grad_copy[id_x]; float temp_y=grad[id_x+consts.pad]-grad_copy[id_x+consts.pad]; float temp_z=grad[id_x+2*consts.pad]-grad_copy[id_x+2*consts.pad]; - + */ //float max = temp_x*temp_x+temp_y*temp_y+temp_z*temp_z; max = float(grad_norm);//sqrt(temp_x*temp_x+temp_y*temp_y+temp_z*temp_z); }; diff --git a/shaders/ApplyDepondt2_double.spv b/shaders/ApplyDepondt2_double.spv index 7642505ff..5dee8059f 100644 Binary files a/shaders/ApplyDepondt2_double.spv and b/shaders/ApplyDepondt2_double.spv differ diff --git a/shaders/ApplyDepondt2_float.comp b/shaders/ApplyDepondt2_float.comp index fb2443d70..9d3b0acf3 100644 --- a/shaders/ApplyDepondt2_float.comp +++ b/shaders/ApplyDepondt2_float.comp @@ -38,10 +38,11 @@ shared float sdata[sumSubGroupSize]; void main() { uint id_x=gl_GlobalInvocationID.x; - float grad_corr_x=0.5*(grad[id_x]+grad_copy[id_x]); - float grad_corr_y=0.5*(grad[id_x+consts.pad]+grad_copy[id_x+consts.pad]); - float grad_corr_z=0.5*(grad[id_x+2*consts.pad]+grad_copy[id_x+2*consts.pad]); - float grad_norm = sqrt(grad_corr_x*grad_corr_x+grad_corr_y*grad_corr_y+grad_corr_z*grad_corr_z); + vec3 grad_corr; + grad_corr.x=0.5*(grad[id_x]+grad_copy[id_x]); + grad_corr.y=0.5*(grad[id_x+consts.pad]+grad_copy[id_x+consts.pad]); + grad_corr.z=0.5*(grad[id_x+2*consts.pad]+grad_copy[id_x+2*consts.pad]); + float grad_norm = length(grad_corr); float gamma_dt= (consts.dt==0) ? 1e-4/gamma_dt_from_torque[0] : consts.dt; float max=0; @@ -50,30 +51,29 @@ void main() { float grad_norm_inv=1.0/grad_norm; float q = cos(grad_norm*gamma_dt); float w = 1 - q; - float x = -grad_corr_x* grad_norm_inv; - float y = -grad_corr_y* grad_norm_inv; - float z = -grad_corr_z* grad_norm_inv; + float x = -grad_corr.x* grad_norm_inv; + float y = -grad_corr.y* grad_norm_inv; + float z = -grad_corr.z* grad_norm_inv; float s1 = x * y * w; float s2 = x * z * w; float s3 = z * y * w; - float p1 = z * sin(grad_norm*gamma_dt); - float p2 = y * sin(grad_norm*gamma_dt); - float p3 = x * sin(grad_norm*gamma_dt); - float t1, t2, t3; + float sin_grad= sin(grad_norm*gamma_dt); + float p1 = z * sin_grad; + float p2 = y * sin_grad; + float p3 = x * sin_grad; + vec3 t; - t1 = (q + x* x * w) * s_copy[id_x] + (s1 - p1) * s_copy[id_x+consts.pad] + (s2 + p2) * s_copy[id_x+2*consts.pad]; - t2 = (s1 + p1) * s_copy[id_x] + (q + y * y * w) * s_copy[id_x+consts.pad] + (s3 - p3) * s_copy[id_x+2*consts.pad]; - t3 = (s2 - p2) * s_copy[id_x] + (s3 + p3) * s_copy[id_x+consts.pad] + (q + z * z * w) * s_copy[id_x+2*consts.pad]; - //temp_x=s_copy[id_x]-t1; - //temp_y=s_copy[id_x+consts.pad]-t2; - //temp_z=s_copy[id_x+2*consts.pad]-t3; - s[id_x] = t1; - s[id_x+consts.pad] = t2; - s[id_x+2*consts.pad] = t3; + t.x = (q + x* x * w) * s_copy[id_x] + (s1 - p1) * s_copy[id_x+consts.pad] + (s2 + p2) * s_copy[id_x+2*consts.pad]; + t.y = (s1 + p1) * s_copy[id_x] + (q + y * y * w) * s_copy[id_x+consts.pad] + (s3 - p3) * s_copy[id_x+2*consts.pad]; + t.z = (s2 - p2) * s_copy[id_x] + (s3 + p3) * s_copy[id_x+consts.pad] + (q + z * z * w) * s_copy[id_x+2*consts.pad]; + t=normalize(t); + s[id_x] = t.x; + s[id_x+consts.pad] = t.y; + s[id_x+2*consts.pad] = t.z; - float temp_x=grad[id_x]-grad_copy[id_x]; + /*float temp_x=grad[id_x]-grad_copy[id_x]; float temp_y=grad[id_x+consts.pad]-grad_copy[id_x+consts.pad]; - float temp_z=grad[id_x+2*consts.pad]-grad_copy[id_x+2*consts.pad]; + float temp_z=grad[id_x+2*consts.pad]-grad_copy[id_x+2*consts.pad];*/ //float max = temp_x*temp_x+temp_y*temp_y+temp_z*temp_z; max = grad_norm;//sqrt(temp_x*temp_x+temp_y*temp_y+temp_z*temp_z); diff --git a/shaders/ApplyDepondt2_float.spv b/shaders/ApplyDepondt2_float.spv index 84db96de4..3eeb01184 100644 Binary files a/shaders/ApplyDepondt2_float.spv and b/shaders/ApplyDepondt2_float.spv differ diff --git a/shaders/ApplyLBFGS1.comp b/shaders/ApplyLBFGS1.comp index 83faf59c7..8b9e65cbb 100644 --- a/shaders/ApplyLBFGS1.comp +++ b/shaders/ApplyLBFGS1.comp @@ -40,9 +40,9 @@ layout(std430, binding = 7) writeonly buffer PartialSums { float partial_sums[]; }; -layout(binding = 8) buffer MaxMove +layout(binding = 8) buffer LastMax { - float maxmove []; + float lastMax []; }; layout(push_constant) uniform PushConsts @@ -70,13 +70,12 @@ void main() { } if (gl_LocalInvocationID.x==gl_WorkGroupSize.x-1){ for (uint j=0;j1e-3){ + float energy=2*lastMax[1]+lastMax[2]+lastMax[3]+lastMax[4]+lastMax[5]; + if (energy>lastMax[11]) + { + float new_max_move=lastMax[10]*0.98; + lastMax[10]=new_max_move; + lastMax[17]=new_max_move; + } + + lastMax[11]=energy; + lastMax[12]=torque; + } else { + if (torque>lastMax[12]) + { + float new_max_move=lastMax[10]*0.99; + lastMax[10]=new_max_move; + lastMax[17]=new_max_move; + } + + lastMax[12]=torque; + + } + + + } + + + /*float newMax = lastMax[10]*torque; + lastMax[17] = ((torque<1)&&(newMax consts.eps) inv_rhody2 = 1.0 / rhody2; diff --git a/shaders/ApplyLBFGS4.spv b/shaders/ApplyLBFGS4.spv index 10519458a..3ecf88eea 100644 Binary files a/shaders/ApplyLBFGS4.spv and b/shaders/ApplyLBFGS4.spv differ diff --git a/shaders/ApplyLBFGS6.comp b/shaders/ApplyLBFGS6.comp index 33a3471c9..1d3f9be9c 100644 --- a/shaders/ApplyLBFGS6.comp +++ b/shaders/ApplyLBFGS6.comp @@ -53,7 +53,7 @@ void main() { if (consts.k==consts.num_mem-1){ - sd[gl_GlobalInvocationID.x]=-sd[gl_GlobalInvocationID.x]; + //sd[gl_GlobalInvocationID.x]=-sd[gl_GlobalInvocationID.x]; grad_pr[gl_GlobalInvocationID.x]=grad[gl_GlobalInvocationID.x]; } } diff --git a/shaders/ApplyLBFGS6.spv b/shaders/ApplyLBFGS6.spv index 1294fbb63..eab8475b8 100644 Binary files a/shaders/ApplyLBFGS6.spv and b/shaders/ApplyLBFGS6.spv differ diff --git a/shaders/ApplyLBFGS8_double.comp b/shaders/ApplyLBFGS8_double.comp index b25136c23..3094e5973 100644 --- a/shaders/ApplyLBFGS8_double.comp +++ b/shaders/ApplyLBFGS8_double.comp @@ -53,15 +53,12 @@ layout(binding = 3) readonly buffer Data1 { float reduceResults[]; }; -layout(std430, binding = 4) writeonly buffer PartialMax -{ - float partial_max[]; -}; -layout(binding = 5) buffer MaxMove + +layout(binding = 4) buffer MaxMove { float maxmove []; }; -layout(binding = 6) buffer Data2 +layout(binding = 5) buffer Data2 { float s_copy []; }; @@ -75,53 +72,49 @@ layout(push_constant) uniform PushConsts float max_move; } consts; -shared float sdata[sumSubGroupSize]; +//shared float sdata[sumSubGroupSize]; void main() { double theta_rms=sqrt(reduceResults[0]/consts.nos); double scaling = (theta_rms>maxmove[1]) ? maxmove[1]/theta_rms : 1.0; uint id_x=gl_GlobalInvocationID.x; - s_copy[id_x]=s[id_x]; - s_copy[id_x+consts.pad]=s[id_x+consts.pad]; - s_copy[id_x+2*consts.pad]=s[id_x+2*consts.pad]; - double sd_loc[3]; - sd_loc[0]=sd[id_x]*scaling; - sd_loc[1]=sd[id_x+consts.pad]*scaling; - sd_loc[2]=sd[id_x+2*consts.pad]*scaling; - sd[id_x]=float(sd_loc[0]); - sd[id_x+consts.pad]=float(sd_loc[1]); - sd[id_x+2*consts.pad]=float(sd_loc[2]); - //float temp_x=0; - //float temp_y=0; - //float temp_z=0; - double theta = sqrt(sd_loc[0]*sd_loc[0]+sd_loc[1]*sd_loc[1]+sd_loc[2]*sd_loc[2]); - //float s_norm=1.0; + vec3 s_loc; + s_loc.x=s[id_x]; + s_loc.y=s[id_x+consts.pad]; + s_loc.z=s[id_x+2*consts.pad]; + s_copy[id_x]=s_loc.x; + s_copy[id_x+consts.pad]=s_loc.y; + s_copy[id_x+2*consts.pad]=s_loc.z; + dvec3 sd_loc; + sd_loc.x=sd[id_x]*scaling; + sd_loc.y=sd[id_x+consts.pad]*scaling; + sd_loc.z=sd[id_x+2*consts.pad]*scaling; + sd[id_x]=float(sd_loc.x); + sd[id_x+consts.pad]=float(sd_loc.y); + sd[id_x+2*consts.pad]=float(sd_loc.z); + double theta = length(sd_loc); if (theta > 1.0e-20) // if theta is too small we do nothing { double theta_inv = 1.0/theta; double q = cosa_11(theta); double w = 1 - q; - double x = -sd_loc[0]* theta_inv; - double y = -sd_loc[1]* theta_inv; - double z = -sd_loc[2]* theta_inv; - double s1 = -y * z * w; + double x = -sd_loc.x* theta_inv; + double y = -sd_loc.y* theta_inv; + double z = -sd_loc.z* theta_inv; + double s1 = x * y * w; double s2 = x * z * w; - double s3 = -x * y * w; + double s3 = z * y * w; double sin_theta= sina_11(theta); - double p1 = x * sin_theta; + double p1 = z * sin_theta; double p2 = y * sin_theta; - double p3 = z * sin_theta; - double t1, t2, t3; - t1 = (q + z * z * w) * s[id_x] + (s1 + p1) * s[id_x+consts.pad] + (s2 + p2) * s[id_x+2*consts.pad]; - t2 = (s1 - p1) * s[id_x] + (q + y * y * w) * s[id_x+consts.pad] + (s3 + p3) * s[id_x+2*consts.pad]; - t3 = (s2 - p2) * s[id_x] + (s3 - p3) * s[id_x+consts.pad] + (q + x * x * w) * s[id_x+2*consts.pad]; - //temp_x=s[id_x]-float(t1); - //temp_y=s[id_x+consts.pad]-float(t2); - //temp_z=s[id_x+2*consts.pad]-float(t3); - //s_norm=float(t1*t1+t2*t2+t3*t3); - s[id_x] =float(t1);//scaling;//sd[id_x];//t1; - s[id_x+consts.pad] = float(t2);//iteration[gl_WorkGroupID.x];//sd[id_x+consts.pad];// t2; - s[id_x+2*consts.pad] =float(t3);//sd[id_x+consts.pad*2];//t3; + double p3 = x * sin_theta; + dvec3 t; + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + s[id_x] =float(t.x);//scaling;//sd[id_x];//t1; + s[id_x+consts.pad] = float(t.y);//iteration[gl_WorkGroupID.x];//sd[id_x+consts.pad];// t2; + s[id_x+2*consts.pad] =float(t.z);//sd[id_x+consts.pad*2];//t3; }; /* float max = (temp_x*temp_x+temp_y*temp_y+temp_z*temp_z); diff --git a/shaders/ApplyLBFGS8_double.spv b/shaders/ApplyLBFGS8_double.spv index 39ddd2840..b9c6b856b 100644 Binary files a/shaders/ApplyLBFGS8_double.spv and b/shaders/ApplyLBFGS8_double.spv differ diff --git a/shaders/ApplyLBFGS8_float.comp b/shaders/ApplyLBFGS8_float.comp index 0de225675..c833cd735 100644 --- a/shaders/ApplyLBFGS8_float.comp +++ b/shaders/ApplyLBFGS8_float.comp @@ -39,47 +39,54 @@ layout(push_constant) uniform PushConsts uint k; float max_move; } consts; -shared float sdata[sumSubGroupSize]; +//shared float sdata[sumSubGroupSize]; void main() { float theta_rms=sqrt(reduceResults[0]/consts.nos); float scaling = (theta_rms>maxmove[1]) ? maxmove[1]/theta_rms : 1.0; uint id_x=gl_GlobalInvocationID.x; - s_copy[id_x]=s[id_x]; - s_copy[id_x+consts.pad]=s[id_x+consts.pad]; - s_copy[id_x+2*consts.pad]=s[id_x+2*consts.pad]; - sd[id_x]*=scaling; - sd[id_x+consts.pad]*=scaling; - sd[id_x+2*consts.pad]*=scaling; - //float temp_x=0; - //float temp_y=0; - //float temp_z=0; - //float s_norm=1.0; - float theta = sqrt(sd[id_x]*sd[id_x]+sd[id_x+consts.pad]*sd[id_x+consts.pad]+sd[id_x+2*consts.pad]*sd[id_x+2*consts.pad]); + vec3 s_loc; + s_loc.x=s[id_x]; + s_loc.y=s[id_x+consts.pad]; + s_loc.z=s[id_x+2*consts.pad]; + s_copy[id_x]=s_loc.x; + s_copy[id_x+consts.pad]=s_loc.y; + s_copy[id_x+2*consts.pad]=s_loc.z; + vec3 sd_loc; + sd_loc.x=sd[id_x]; + sd_loc.y=sd[id_x+consts.pad]; + sd_loc.z=sd[id_x+2*consts.pad]; + sd_loc*=scaling; + //sd[id_x]=sd_loc.x; + //sd[id_x+consts.pad]=sd_loc.y; + //sd[id_x+2*consts.pad]=sd_loc.z; + + float theta = length(sd_loc); if (theta > 1.0e-20) // if theta is too small we do nothing { float theta_inv = 1.0/theta; float q = cos(theta); float w = 1 - q; - float x = -sd[id_x]* theta_inv; - float y = -sd[id_x+consts.pad]* theta_inv; - float z = -sd[id_x+2*consts.pad]* theta_inv; + float x = -sd_loc.x* theta_inv; + float y = -sd_loc.y* theta_inv; + float z = -sd_loc.z* theta_inv; float s1 = x * y * w; float s2 = x * z * w; float s3 = z * y * w; - float p1 = z * sin(theta); - float p2 = y * sin(theta); - float p3 = x * sin(theta); - float t1, t2, t3; + float sin_theta=sin(theta); + float p1 = z * sin_theta; + float p2 = y * sin_theta; + float p3 = x * sin_theta; + vec3 t; - t1 = (q + x* x * w) * s[id_x] + (s1 - p1) * s[id_x+consts.pad] + (s2 + p2) * s[id_x+2*consts.pad]; - t2 = (s1 + p1) * s[id_x] + (q + y * y * w) * s[id_x+consts.pad] + (s3 - p3) * s[id_x+2*consts.pad]; - t3 = (s2 - p2) * s[id_x] + (s3 + p3) * s[id_x+consts.pad] + (q + z * z * w) * s[id_x+2*consts.pad]; - - s[id_x] = t1; - s[id_x+consts.pad] = t2; - s[id_x+2*consts.pad] = t3; + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + t=normalize(t); + s[id_x] = t.x; + s[id_x+consts.pad] = t.y; + s[id_x+2*consts.pad] = t.z; }; if (gl_LocalInvocationID.x==gl_WorkGroupSize.x-1) diff --git a/shaders/ApplyLBFGS8_float.spv b/shaders/ApplyLBFGS8_float.spv index 7df7e9f01..86e547f3c 100644 Binary files a/shaders/ApplyLBFGS8_float.spv and b/shaders/ApplyLBFGS8_float.spv differ diff --git a/shaders/ApplyLBFGS9_float.comp b/shaders/ApplyLBFGS9_float.comp index 5fb838170..164d46970 100644 --- a/shaders/ApplyLBFGS9_float.comp +++ b/shaders/ApplyLBFGS9_float.comp @@ -46,12 +46,15 @@ void main() { uint id_x=gl_GlobalInvocationID.x; //float s_norm=1.0; - - float theta1 = sqrt(sd[id_x]*sd[id_x]+sd[id_x+consts.pad]*sd[id_x+consts.pad]+sd[id_x+2*consts.pad]*sd[id_x+2*consts.pad]); + vec3 sd_loc; + sd_loc.x=sd[id_x]; + sd_loc.y=sd[id_x+consts.pad]; + sd_loc.z=sd[id_x+2*consts.pad]; + float theta1 =length(sd_loc); float theta1_inv = 1.0/theta1; - float f_der0=(torque0[id_x]*sd[id_x]+torque0[id_x+consts.pad]*sd[id_x+consts.pad]+torque0[id_x+2*consts.pad]*sd[id_x+2*consts.pad]); - float f_der1=(torque1[id_x]*sd[id_x]+torque1[id_x+consts.pad]*sd[id_x+consts.pad]+torque1[id_x+2*consts.pad]*sd[id_x+2*consts.pad]); + float f_der0=(torque0[id_x]*sd_loc.x+torque0[id_x+consts.pad]*sd_loc.y+torque0[id_x+2*consts.pad]*sd_loc.z); + float f_der1=(torque1[id_x]*sd_loc.x+torque1[id_x+consts.pad]*sd_loc.y+torque1[id_x+2*consts.pad]*sd_loc.z); float c1 = (-2*(energy[gl_GlobalInvocationID.x]-energy[consts.nos+gl_GlobalInvocationID.x])*theta1_inv+f_der1+f_der0)*theta1_inv*theta1_inv; float c2 = (3*(energy[gl_GlobalInvocationID.x]-energy[consts.nos+gl_GlobalInvocationID.x])*theta1_inv-f_der1-2*f_der0)*theta1_inv; float c3 = f_der0; @@ -71,38 +74,44 @@ void main() { alpha=0; else alpha=theta1;*/ - sd[id_x]*=alpha*theta1_inv; - sd[id_x+consts.pad]*=alpha*theta1_inv; - sd[id_x+2*consts.pad]*=alpha*theta1_inv; + sd_loc*=alpha*theta1_inv; + sd[id_x]=sd_loc.x; + sd[id_x+consts.pad]=sd_loc.y; + sd[id_x+2*consts.pad]=sd_loc.z; if (alpha<=0){ s[id_x]=s_copy[id_x]; s[id_x+consts.pad]= s_copy[id_x+consts.pad]; s[id_x+2*consts.pad]= s_copy[id_x+2*consts.pad]; } else{ - if ((alpha 1.0e-20)){ + if ((alpha < theta1) && (alpha > 1.0e-20)){ + vec3 s_loc; + s_loc.x=s_copy[id_x]; + s_loc.y=s_copy[id_x+consts.pad]; + s_loc.z=s_copy[id_x+2*consts.pad]; float theta = alpha; float theta_inv = 1.0/theta; float q = cos(theta); float w = 1 - q; - float x = -sd[id_x]* theta_inv; - float y = -sd[id_x+consts.pad]* theta_inv; - float z = -sd[id_x+2*consts.pad]* theta_inv; + float x = -sd_loc.x* theta_inv; + float y = -sd_loc.y* theta_inv; + float z = -sd_loc.z* theta_inv; float s1 = x * y * w; float s2 = x * z * w; float s3 = z * y * w; - float p1 = z * sin(theta); - float p2 = y * sin(theta); - float p3 = x * sin(theta); - float t1, t2, t3; + float sin_theta=sin(theta); + float p1 = z * sin_theta; + float p2 = y * sin_theta; + float p3 = x * sin_theta; + vec3 t; - t1 = (q + x* x * w) * s[id_x] + (s1 - p1) * s[id_x+consts.pad] + (s2 + p2) * s[id_x+2*consts.pad]; - t2 = (s1 + p1) * s[id_x] + (q + y * y * w) * s[id_x+consts.pad] + (s3 - p3) * s[id_x+2*consts.pad]; - t3 = (s2 - p2) * s[id_x] + (s3 + p3) * s[id_x+consts.pad] + (q + z * z * w) * s[id_x+2*consts.pad]; - - s[id_x] = t1; - s[id_x+consts.pad] = t2; - s[id_x+2*consts.pad] = t3; + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + t=normalize(t); + s[id_x] = t.x; + s[id_x+consts.pad] = t.y; + s[id_x+2*consts.pad] = t.z; }; } diff --git a/shaders/ApplyLBFGS9_float.spv b/shaders/ApplyLBFGS9_float.spv index 2bda098ca..e1b243423 100644 Binary files a/shaders/ApplyLBFGS9_float.spv and b/shaders/ApplyLBFGS9_float.spv differ diff --git a/shaders/ApplyVP2_double.comp b/shaders/ApplyVP2_double.comp index a1a62c18d..87d49e2ff 100644 --- a/shaders/ApplyVP2_double.comp +++ b/shaders/ApplyVP2_double.comp @@ -54,16 +54,11 @@ layout(binding = 3) readonly buffer Data4 float reduceResults[]; }; -layout(std430, binding = 4) writeonly buffer PartialMax -{ - float partial_max[]; -}; - layout(push_constant) uniform PushConsts { - float dt; - float grad_add; uint pad; + float dt; + float m_temp_inv; } consts; shared float sdata[sumSubGroupSize]; @@ -73,45 +68,42 @@ void main() { grad_pr[id_x]=grad[id_x]; grad_pr[id_x+consts.pad]=grad[id_x+consts.pad]; grad_pr[id_x+2*consts.pad]=grad[id_x+2*consts.pad]; - double grad_mult=0; - if (reduceResults[0] <= 0) { - grad_mult = consts.grad_add;//-consts.dt * 0.5 * consts.m_temp_inv; - } - else { - grad_mult = consts.grad_add-consts.dt * reduceResults[0]/reduceResults[1]; - } - //float temp_x=0; - //float temp_y=0; - //float temp_z=0; - double theta = sqrt(grad_mult*grad[id_x]*grad_mult*grad[id_x]+grad_mult*grad[id_x+consts.pad]*grad_mult*grad[id_x+consts.pad]+grad_mult*grad[id_x+2*consts.pad]*grad_mult*grad[id_x+2*consts.pad]); + vec3 s_loc; + s_loc.x=s[id_x]; + s_loc.y=s[id_x+consts.pad]; + s_loc.z=s[id_x+2*consts.pad]; + double beta= reduceResults[0]/reduceResults[1]; + if (beta>0) beta=0; + dvec3 sd_loc; + sd_loc.x=consts.dt*grad[id_x]*(beta - consts.m_temp_inv); + sd_loc.y=consts.dt*grad[id_x+consts.pad]*(beta - consts.m_temp_inv); + sd_loc.z=consts.dt*grad[id_x+2*consts.pad]*(beta - consts.m_temp_inv); + double theta = length(sd_loc); if (theta > 1.0e-20) // if theta is too small we do nothing { double theta_inv = 1.0/theta; double q = cosa_11(theta); double w = 1 - q; - double x = -grad_mult*grad[id_x]* theta_inv; - double y = -grad_mult*grad[id_x+consts.pad]* theta_inv; - double z = -grad_mult*grad[id_x+2*consts.pad]* theta_inv; + double x = -sd_loc.x* theta_inv; + double y = -sd_loc.y* theta_inv; + double z = -sd_loc.z* theta_inv; - double s1 = -y * z * w; + double s1 = x * z * w; double s2 = x * z * w; - double s3 = -x * y * w; + double s3 = z * y * w; double sin_theta = sina_11(theta); - double p1 = x * sin_theta; + double p1 = z * sin_theta; double p2 = y * sin_theta; - double p3 = z * sin_theta; - double t1, t2, t3; + double p3 = x * sin_theta; + dvec3 t; - t1 = (q + z * z * w) * s[id_x] + (s1 + p1) * s[id_x+consts.pad] + (s2 + p2) * s[id_x+2*consts.pad]; - t2 = (s1 - p1) * s[id_x] + (q + y * y * w) * s[id_x+consts.pad] + (s3 + p3) * s[id_x+2*consts.pad]; - t3 = (s2 - p2) * s[id_x] + (s3 - p3) * s[id_x+consts.pad] + (q + x * x * w) * s[id_x+2*consts.pad]; - //temp_x=s[id_x]-float(t1); - //temp_y=s[id_x+consts.pad]-float(t2); - //temp_z=s[id_x+2*consts.pad]-float(t3); - //double t_norm=1.0;//sqrt(t1*t1+t2*t2+t3*t3); - s[id_x] =float(t1);//scaling;//sd[id_x];//t1; - s[id_x+consts.pad] = float(t2);//iteration[gl_WorkGroupID.x];//sd[id_x+consts.pad];// t2; - s[id_x+2*consts.pad] =float(t3);//sd[id_x+consts.pad*2];//t3; + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + t=normalize(t); + s[id_x] = float(t.x); + s[id_x+consts.pad] = float(t.y); + s[id_x+2*consts.pad] = float(t.z); };/* float max = temp_x*temp_x+temp_y*temp_y+temp_z*temp_z; diff --git a/shaders/ApplyVP2_double.spv b/shaders/ApplyVP2_double.spv index bb422b22e..eaf02409a 100644 Binary files a/shaders/ApplyVP2_double.spv and b/shaders/ApplyVP2_double.spv differ diff --git a/shaders/ApplyVP2_float.comp b/shaders/ApplyVP2_float.comp index b4d833445..6027b3b1c 100644 --- a/shaders/ApplyVP2_float.comp +++ b/shaders/ApplyVP2_float.comp @@ -35,39 +35,42 @@ void main() { grad_pr[id_x]=grad[id_x]; grad_pr[id_x+consts.pad]=grad[id_x+consts.pad]; grad_pr[id_x+2*consts.pad]=grad[id_x+2*consts.pad]; - + vec3 s_loc; + s_loc.x=s[id_x]; + s_loc.y=s[id_x+consts.pad]; + s_loc.z=s[id_x+2*consts.pad]; float beta= reduceResults[0]/reduceResults[1]; if (beta>0) beta=0; - float sd[3]; - sd[0]=consts.dt*grad[id_x]*(beta - consts.m_temp_inv); - sd[1]=consts.dt*grad[id_x+consts.pad]*(beta - consts.m_temp_inv); - sd[2]=consts.dt*grad[id_x+2*consts.pad]*(beta - consts.m_temp_inv); + vec3 sd_loc; + sd_loc.x=consts.dt*grad[id_x]*(beta - consts.m_temp_inv); + sd_loc.y=consts.dt*grad[id_x+consts.pad]*(beta - consts.m_temp_inv); + sd_loc.z=consts.dt*grad[id_x+2*consts.pad]*(beta - consts.m_temp_inv); - float theta = sqrt(sd[0]*sd[0]+sd[1]*sd[1]+sd[2]*sd[2]); + float theta = length(sd_loc); if (theta > 1.0e-20) // if theta is too small we do nothing { float theta_inv = 1.0/theta; float q = cos(theta); float w = 1 - q; - float x = -sd[0]* theta_inv; - float y = -sd[1]* theta_inv; - float z = -sd[2]* theta_inv; + float x = -sd_loc.x* theta_inv; + float y = -sd_loc.y* theta_inv; + float z = -sd_loc.z* theta_inv; float s1 = x * y * w; float s2 = x * z * w; float s3 = z * y * w; float p1 = z * sin(theta); float p2 = y * sin(theta); float p3 = x * sin(theta); - float t1, t2, t3; + vec3 t; - t1 = (q + x* x * w) * s[id_x] + (s1 - p1) * s[id_x+consts.pad] + (s2 + p2) * s[id_x+2*consts.pad]; - t2 = (s1 + p1) * s[id_x] + (q + y * y * w) * s[id_x+consts.pad] + (s3 - p3) * s[id_x+2*consts.pad]; - t3 = (s2 - p2) * s[id_x] + (s3 + p3) * s[id_x+consts.pad] + (q + z * z * w) * s[id_x+2*consts.pad]; - - s[id_x] = t1; - s[id_x+consts.pad] = t2; - s[id_x+2*consts.pad] = t3; + t.x = (q + x* x * w) * s_loc.x + (s1 - p1) * s_loc.y + (s2 + p2) * s_loc.z; + t.y = (s1 + p1) * s_loc.x + (q + y * y * w) * s_loc.y + (s3 - p3) * s_loc.z; + t.z = (s2 - p2) * s_loc.x + (s3 + p3) * s_loc.y + (q + z * z * w) * s_loc.z; + t=normalize(t); + s[id_x] = t.x; + s[id_x+consts.pad] = t.y; + s[id_x+2*consts.pad] = t.z; }; diff --git a/shaders/ApplyVP2_float.spv b/shaders/ApplyVP2_float.spv index 728f3067c..4098417f2 100644 Binary files a/shaders/ApplyVP2_float.spv and b/shaders/ApplyVP2_float.spv differ diff --git a/shaders/compile.bat b/shaders/compile.bat index cf8f0c078..c7bfcf842 100644 --- a/shaders/compile.bat +++ b/shaders/compile.bat @@ -9,6 +9,9 @@ glslangvalidator -V Apply3.comp -o Apply3.spv --target-env spirv1.3 glslangvalidator -V ApplyVP1.comp -o ApplyVP1.spv --target-env spirv1.3 glslangvalidator -V ApplyVP2_float.comp -o ApplyVP2_float.spv --target-env spirv1.3 glslangvalidator -V ApplyVP2_double.comp -o ApplyVP2_double.spv --target-env spirv1.3 +glslangvalidator -V ApplyCG1.comp -o ApplyCG1.spv --target-env spirv1.3 +glslangvalidator -V ApplyCG2_float.comp -o ApplyCG2_float.spv --target-env spirv1.3 +glslangvalidator -V ApplyCG2_double.comp -o ApplyCG2_double.spv --target-env spirv1.3 glslangvalidator -V ApplyLBFGS1.comp -o ApplyLBFGS1.spv --target-env spirv1.3 glslangvalidator -V ApplyLBFGS2.comp -o ApplyLBFGS2.spv --target-env spirv1.3 glslangvalidator -V ApplyLBFGS3.comp -o ApplyLBFGS3.spv --target-env spirv1.3 diff --git a/shaders/gradient_double.comp b/shaders/gradient_double.comp index 8714488e2..698237fe9 100644 --- a/shaders/gradient_double.comp +++ b/shaders/gradient_double.comp @@ -35,6 +35,8 @@ struct Region{ float anisotropy_cubic_normals[9]; float cell_sizes[3]; float cell_sizes_inv[3]; + float periodic[3]; + float frozen_spins; }; layout(binding = 0) readonly buffer Spins_Buf @@ -71,7 +73,7 @@ layout(std430, binding = 6) buffer Energy }; //layout (constant_id = 0) const int FORCE_TYPE = 0; -shared float sdata[(num_gradient_components+3)*sumSubGroupSize]; +shared float sdata[(num_gradient_components+4)*sumSubGroupSize]; void main() { //return; @@ -79,30 +81,36 @@ void main() { // return; uint pad= n; uint x = gl_GlobalInvocationID.x; - uint xl = (x==0) ? x : x-1; - uint xll = (x==1) ? xl : x-2; - xll = (x==0) ? x : xll; - uint xr = (x==WIDTH-1) ? x : x+1; - uint xrr = (x==WIDTH-2) ? xr : x+2; - xrr = (x==WIDTH-1) ? x : xrr; - uint y = gl_GlobalInvocationID.y; - uint yl = (y==0) ? y : y-1; - uint yll = (y==1) ? yl : y-2; - yll = (y==0) ? y : yll; - uint yr = (y==HEIGHT-1) ? y : y+1; - uint yrr = (y==HEIGHT-2) ? yr : y+2; - yrr = (y==HEIGHT-1) ? y : yrr; - uint z = gl_GlobalInvocationID.z; - uint zl = (z==0) ? z : z-1; - uint zll = (z==1) ? zl : z-2; - zll = (z==0) ? z : zll; - uint zr = (z==DEPTH-1) ? z : z+1; - uint zrr = (z==DEPTH-2) ? zr : z+2; - zrr = (z==DEPTH-1) ? z : zrr; - uint icell=x+y*WIDTH+z*WIDTH*HEIGHT; + uint xl, xr; + if (regions_book[regions[icell]].periodic[0]==0){ + xl = (x==0) ? x : x-1; + xr = (x==WIDTH-1) ? x : x+1; + }else{ + xl = (x==0) ? WIDTH-1 : x-1; + xr = (x==WIDTH-1) ? 0 : x+1; + } + + uint yl, yr; + if (regions_book[regions[icell]].periodic[1]==0){ + yl = (y==0) ? y : y-1; + yr = (y==HEIGHT-1) ? y : y+1; + }else{ + yl = (y==0) ? HEIGHT-1 : y-1; + yr = (y==HEIGHT-1) ? 0 : y+1; + } + + uint zl, zr; + if (regions_book[regions[icell]].periodic[2]==0){ + zl = (z==0) ? z : z-1; + zr = (z==DEPTH-1) ? z : z+1; + }else{ + zl = (z==0) ? DEPTH-1 : z-1; + zr = (z==DEPTH-1) ? 0 : z+1; + } + double gradient_values_x[num_gradient_components]; double gradient_values_y[num_gradient_components]; @@ -127,9 +135,9 @@ void main() { m.x=spins[icell]; m.y=spins[icell+pad]; m.z=spins[icell+2*pad]; - gradient_values_x[0] = -mult*regions_book[regions[icell]].Ms * regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[0]; - gradient_values_y[0] = -mult*regions_book[regions[icell]].Ms * regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[1]; - gradient_values_z[0] = -mult*regions_book[regions[icell]].Ms * regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[2]; + gradient_values_x[0] = - regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[0]; + gradient_values_y[0] = - regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[1]; + gradient_values_z[0] = - regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[2]; /* //Zeeman - Custom field if ((x-WIDTH*0.5f)*(x-WIDTH*0.5f)+(y-HEIGHT*0.5f)*(y-HEIGHT*0.5f) > 0.2*WIDTH*HEIGHT){ @@ -353,13 +361,11 @@ void main() { gradient_values_y[3] += regions_book[regions[icell]].Dmi_bulk * regions_book[regions[icell]].Ms_inv * ((spins_plus[0] - spins_0[0]) * (regions_book[regions[icell]].cell_sizes_inv[2])-0.5*regions_book[regions[icell]].Dmi_bulk*regions_book[regions[icell]].Aexch_inv*spins_0[1]); }else { - uint icell_plus=x+y*WIDTH+zr*WIDTH*HEIGHT; - uint icell_minus=x+y*WIDTH+zl*WIDTH*HEIGHT; float Dl=regions_book[regions[icell_minus]].Dmi_bulk; float Dr=regions_book[regions[icell_plus]].Dmi_bulk; float D=regions_book[regions[icell]].Dmi_bulk; - float Dll=(D==0) ? 0 : 2*D*Dl/(D+Dl); - float Drr=(D==0) ? 0 : 2*D*Dr/(D+Dr); + float Dll=sqrt(D*Dl); + float Drr=sqrt(D*Dr); gradient_values_x[2] -= prefactor*((spins_plus[0] - spins_0[0]) + (spins_minus[0]- spins_0[0]))* (regions_book[regions[icell]].cell_sizes_inv[2]); gradient_values_y[2] -= prefactor*((spins_plus[1] - spins_0[1]) + (spins_minus[1]- spins_0[1]))* (regions_book[regions[icell]].cell_sizes_inv[2]) ; @@ -387,25 +393,29 @@ void main() { gradient_total[2]+=gradient_values_z[i]; //reduceEnergy[i]=0; } + double torque[3]={0,0,0}; if (damping==1){ gradient[icell]=float(gradient_total[0]+regions_book[regions[icell]].alpha*(spins[icell+pad]*gradient_total[2]-spins[icell+2*pad]*gradient_total[1]))/(1+regions_book[regions[icell]].alpha*regions_book[regions[icell]].alpha); gradient[icell+pad]=float(gradient_total[1]+regions_book[regions[icell]].alpha*(spins[icell+2*pad]*gradient_total[0]-spins[icell]*gradient_total[2]))/(1+regions_book[regions[icell]].alpha*regions_book[regions[icell]].alpha); gradient[icell+2*pad]=float(gradient_total[2]+regions_book[regions[icell]].alpha*(spins[icell]*gradient_total[1]-spins[icell+pad]*gradient_total[0]))/(1+regions_book[regions[icell]].alpha*regions_book[regions[icell]].alpha); //torque - gradient[icell+3*n]=-float(spins_0[1]*gradient[icell+2*pad]-spins_0[2]*gradient[icell+pad]); - gradient[icell+3*n+pad]=-float(-spins_0[0]*gradient[icell+2*pad]+spins_0[2]*gradient[icell]); - gradient[icell+3*n+2*pad]=-float(spins_0[0]*gradient[icell+pad]-spins_0[1]*gradient[icell]); + torque[0]=-spins_0[1]*gradient[icell+2*pad]+spins_0[2]*gradient[icell+pad]; + torque[1]=spins_0[0]*gradient[icell+2*pad]-spins_0[2]*gradient[icell]; + torque[2]=-spins_0[0]*gradient[icell+pad]+spins_0[1]*gradient[icell]; }else{ gradient[icell]=float(gradient_total[0]); gradient[icell+pad]=float(gradient_total[1]); gradient[icell+2*pad]=float(gradient_total[2]); //torque - gradient[icell+3*n]=-float(spins_0[1]*gradient_total[2]-spins_0[2]*gradient_total[1]); - gradient[icell+3*n+pad]=-float(-spins_0[0]*gradient_total[2]+spins_0[2]*gradient_total[0]); - gradient[icell+3*n+2*pad]=-float(spins_0[0]*gradient_total[1]-spins_0[1]*gradient_total[0]); + torque[0]=float(-double(spins_0[1])*double(gradient_total[2])+double(spins_0[2])*double(gradient_total[1])); + torque[1]=float(double(spins_0[0])*double(gradient_total[2])-double(spins_0[2])*double(gradient_total[0])); + torque[2]=float(-double(spins_0[0])*double(gradient_total[1])+double(spins_0[1])*double(gradient_total[0])); } + gradient[icell+3*n]=float(torque[0]); + gradient[icell+3*n+pad]=float(torque[1]); + gradient[icell+3*n+2*pad]=float(torque[2]); for (int i=2; i< num_components_write;i++){ gradient[icell+i*3*n]=float(gradient_values_x[i-2]); gradient[icell+pad+i*3*n]=float(gradient_values_y[i-2]); @@ -417,6 +427,7 @@ void main() { float mx=0.0; float my=0.0; float mz=0.0; + float max_torque=0.0; for (uint i=0; i < num_gradient_components;i++){ reduceEnergy[i] = float(gradient_values_x[i]*spins_0[0]); reduceEnergy[i] += float(gradient_values_y[i]*spins_0[1]); @@ -432,7 +443,8 @@ void main() { mx = spins[icell]; my = spins[icell+pad]; mz = spins[icell+2*pad]; - + max_torque=float(torque[0]*torque[0]+torque[1]*torque[1]+torque[2]*torque[2]); + for (uint i=0; i < num_gradient_components;i++){ reduceEnergy[i] = subgroupAdd(reduceEnergy[i]); } @@ -440,15 +452,16 @@ void main() { mx = subgroupAdd(mx); my = subgroupAdd(my); mz = subgroupAdd(mz); - + max_torque = subgroupMax(max_torque); if (gl_SubgroupInvocationID == 0) { for (uint i=0; i < num_gradient_components;i++){ - sdata[gl_SubgroupID+i*sumSubGroupSize]=reduceEnergy[i]; + sdata[gl_SubgroupID+(i+1)*sumSubGroupSize]=reduceEnergy[i]; } - sdata[gl_SubgroupID+num_gradient_components*sumSubGroupSize] = mx; - sdata[gl_SubgroupID+(num_gradient_components+1)*sumSubGroupSize] = my; - sdata[gl_SubgroupID+(num_gradient_components+2)*sumSubGroupSize] = mz; + sdata[gl_SubgroupID+(num_gradient_components+1)*sumSubGroupSize] = mx; + sdata[gl_SubgroupID+(num_gradient_components+2)*sumSubGroupSize] = my; + sdata[gl_SubgroupID+(num_gradient_components+3)*sumSubGroupSize] = mz; + sdata[gl_SubgroupID] = max_torque; } memoryBarrierShared(); @@ -457,17 +470,20 @@ void main() { if (gl_SubgroupID == 0) { for (uint i=0; i < num_gradient_components;i++){ - reduceEnergy[i] = gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+i*sumSubGroupSize] : 0; + reduceEnergy[i] = gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+(i+1)*sumSubGroupSize] : 0; reduceEnergy[i] = subgroupAdd(reduceEnergy[i]); } - mx= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+num_gradient_components*sumSubGroupSize] : 0; + mx= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+(num_gradient_components+1)*sumSubGroupSize] : 0; mx = subgroupAdd(mx); - my= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+(num_gradient_components+1)*sumSubGroupSize] : 0; + my= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+(num_gradient_components+2)*sumSubGroupSize] : 0; my = subgroupAdd(my); - mz= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+(num_gradient_components+2)*sumSubGroupSize] : 0; + mz= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID+(num_gradient_components+3)*sumSubGroupSize] : 0; mz = subgroupAdd(mz); + + max_torque= gl_SubgroupInvocationID < gl_NumSubgroups ? sdata[gl_SubgroupInvocationID] : 0; + max_torque = subgroupMax(max_torque); } if (gl_LocalInvocationID.x+gl_LocalInvocationID.y*gl_WorkGroupSize.x == 0) @@ -476,12 +492,13 @@ void main() { uint num_WorkGroups=gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_NumWorkGroups.z; for (uint i=0; i < num_gradient_components;i++){ - partial_sums[work_ID+i*num_WorkGroups]=reduceEnergy[i]; + partial_sums[work_ID+(i+1)*num_WorkGroups]=reduceEnergy[i]; } - partial_sums[work_ID+num_gradient_components*num_WorkGroups] = mx; - partial_sums[work_ID+(num_gradient_components+1)*num_WorkGroups] = my; - partial_sums[work_ID+(num_gradient_components+2)*num_WorkGroups] = mz; + partial_sums[work_ID+(num_gradient_components+1)*num_WorkGroups] = mx; + partial_sums[work_ID+(num_gradient_components+2)*num_WorkGroups] = my; + partial_sums[work_ID+(num_gradient_components+3)*num_WorkGroups] = mz; + partial_sums[work_ID] = max_torque; } } } diff --git a/shaders/gradient_double.spv b/shaders/gradient_double.spv index 418950b1b..04b692aaf 100644 Binary files a/shaders/gradient_double.spv and b/shaders/gradient_double.spv differ diff --git a/shaders/gradient_float.comp b/shaders/gradient_float.comp index 9ce50c73a..3eedc32d2 100644 --- a/shaders/gradient_float.comp +++ b/shaders/gradient_float.comp @@ -80,7 +80,6 @@ void main() { //if(gl_GlobalInvocationID.x >= WIDTH || gl_GlobalInvocationID.y >= HEIGHT || gl_GlobalInvocationID.z >= DEPTH) // return; uint pad= n; - uint x = gl_GlobalInvocationID.x; uint y = gl_GlobalInvocationID.y; uint z = gl_GlobalInvocationID.z; @@ -135,9 +134,9 @@ void main() { m.x=spins[icell]; m.y=spins[icell+pad]; m.z=spins[icell+2*pad]; - gradient_values_x[0] = -mult*regions_book[regions[icell]].Ms * regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[0]; - gradient_values_y[0] = -mult*regions_book[regions[icell]].Ms * regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[1]; - gradient_values_z[0] = -mult*regions_book[regions[icell]].Ms * regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[2]; + gradient_values_x[0] = - regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[0]; + gradient_values_y[0] = - regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[1]; + gradient_values_z[0] = - regions_book[regions[icell]].external_field_magnitude*regions_book[regions[icell]].external_field_normal[2]; /* //Zeeman - Custom field if ((x-WIDTH*0.5f)*(x-WIDTH*0.5f)+(y-HEIGHT*0.5f)*(y-HEIGHT*0.5f) > 0.2*WIDTH*HEIGHT){ @@ -361,8 +360,7 @@ void main() { gradient_values_y[3] += regions_book[regions[icell]].Dmi_bulk * regions_book[regions[icell]].Ms_inv * ((spins_plus[0] - spins_0[0]) * (regions_book[regions[icell]].cell_sizes_inv[2])-0.5*regions_book[regions[icell]].Dmi_bulk*regions_book[regions[icell]].Aexch_inv*spins_0[1]); }else { - uint icell_plus=x+y*WIDTH+zr*WIDTH*HEIGHT; - uint icell_minus=x+y*WIDTH+zl*WIDTH*HEIGHT; + float Dl=regions_book[regions[icell_minus]].Dmi_bulk; float Dr=regions_book[regions[icell_plus]].Dmi_bulk; float D=regions_book[regions[icell]].Dmi_bulk; diff --git a/shaders/gradient_float.spv b/shaders/gradient_float.spv index 0375ff826..b37f5e031 100644 Binary files a/shaders/gradient_float.spv and b/shaders/gradient_float.spv differ diff --git a/ui-cpp/include/HamiltonianMicromagneticWidget.hpp b/ui-cpp/include/HamiltonianMicromagneticWidget.hpp index bbfde46de..80cd6a844 100644 --- a/ui-cpp/include/HamiltonianMicromagneticWidget.hpp +++ b/ui-cpp/include/HamiltonianMicromagneticWidget.hpp @@ -35,6 +35,7 @@ private slots: void set_exchange(); void set_dmi(); + void set_damping(); void set_ddi_checkBox(); void set_ddi_lineEdit(); void set_frozen_spins(); diff --git a/ui-cpp/main.cpp b/ui-cpp/main.cpp index 6b3e45bd0..6c915907b 100644 --- a/ui-cpp/main.cpp +++ b/ui-cpp/main.cpp @@ -77,8 +77,9 @@ int main(int argc, char ** argv) // // Set the chain length // Chain_Set_Length(state.get(), 12); - // // First image is plus-z with a Bloch skyrmion at the center - Configuration_PlusZ(state.get()); + // set regions (uncomment below) + + /*Configuration_PlusZ(state.get()); std::shared_ptr image; std::shared_ptr chain; @@ -90,7 +91,7 @@ int main(int argc, char ** argv) for (int k = 0; k < image->geometry->n_cells[2]; k++) { for (int j = 0; j < image->geometry->n_cells[1]; j++) { for (int i = 0; i < image->geometry->n_cells[0]; i++) { - regions[i + j * image->geometry->n_cells[0] + k * image->geometry->n_cells[0] * image->geometry->n_cells[1]] = 1;//will set all spins regions to 1 + regions[i + j * image->geometry->n_cells[0] + k * image->geometry->n_cells[0] * image->geometry->n_cells[1]] = 0;//will set all spins regions to 1 } } } @@ -100,15 +101,15 @@ int main(int argc, char ** argv) regions[i+j* image->geometry->n_cells[0]+0* image->geometry->n_cells[0]* image->geometry->n_cells[1]] = 0;//will set inner spins to 0 } } - Hamiltonian_Set_Regions(state.get(), regions.data()); - // } + Hamiltonian_Set_Regions(state.get(), regions.data());*/ - //float dir[3] = { 1,1,1}; - //Configuration_Domain(state.get(), dir); + + float dir[3] = { 0,1,0}; + Configuration_Domain(state.get(), dir); //Configuration_APStripe(state.get()); //Configuration_Vortex(state.get(), 1000.0, 1.0, 0, false, false, false); - Configuration_Skyrmion(state.get(), 150.0, 1.0, 0, false, false, false); + //Configuration_Skyrmion(state.get(), 150.0, 1.0, 0, false, false, false); //Configuration_Skyrmion(state.get(), 50.0, 1.0, 0, true, false, false); //Configuration_Hopfion(state.get(), 40, 1.0); //Configuration_SpinSpiral(State * state);// diff --git a/ui-cpp/src/ControlWidget.cpp b/ui-cpp/src/ControlWidget.cpp index 68a53d1c6..d06743de0 100644 --- a/ui-cpp/src/ControlWidget.cpp +++ b/ui-cpp/src/ControlWidget.cpp @@ -136,14 +136,17 @@ void ControlWidget::cycleSolver() } void ControlWidget::setSolver(std::string SolverName) { - if (SolverName == "RK4") - this->comboBox_Solver->setCurrentIndex(3); + this->comboBox_Method->setCurrentIndex(0); if (SolverName == "Depondt") - this->comboBox_Solver->setCurrentIndex(2); + this->comboBox_Solver->setCurrentIndex(0); + if (SolverName == "RK4") + this->comboBox_Solver->setCurrentIndex(1); if (SolverName == "LBFGS_OSO") - this->comboBox_Solver->setCurrentIndex(5); + this->comboBox_Solver->setCurrentIndex(2); if (SolverName == "VP_OSO") - this->comboBox_Solver->setCurrentIndex(7); + this->comboBox_Solver->setCurrentIndex(3); + if (SolverName == "CG_OSO") + this->comboBox_Solver->setCurrentIndex(4); } std::string ControlWidget::methodName() { @@ -184,7 +187,9 @@ void ControlWidget::play_pause() solver = Solver_LBFGS_OSO; else if( s_solver == "LBFGS_Atlas" ) solver = Solver_LBFGS_Atlas; - if( s_solver == "VP_OSO" ) + else if (s_solver == "CG_OSO") + solver = Solver_CG_OSO; + else if( s_solver == "VP_OSO" ) solver = Solver_VP_OSO; if( Simulation_Running_On_Image(this->state.get()) || diff --git a/ui-cpp/src/HamiltonianMicromagneticWidget.cpp b/ui-cpp/src/HamiltonianMicromagneticWidget.cpp index 30f8c51e5..799eb5ab0 100644 --- a/ui-cpp/src/HamiltonianMicromagneticWidget.cpp +++ b/ui-cpp/src/HamiltonianMicromagneticWidget.cpp @@ -102,7 +102,8 @@ void HamiltonianMicromagneticWidget::updateData() this->lineEdit_dmi_01->setText(QString::number(d2[1])); if ((d2[0]!=0)||(d2[1]!=0)) this->checkBox_dmi->setChecked(true); else this->checkBox_dmi->setChecked(false); - + Hamiltonian_Get_damping(state.get(), &d, this->comboBox_region->currentIndex()); + this->lineEdit_damping->setText(QString::number(d)); Hamiltonian_Get_DDI_coefficient(state.get(), d2, this->comboBox_region->currentIndex()); this->lineEdit_ddi->setText(QString::number(d2[0])); if (d2[1]) this->checkBox_ddi->setChecked(true); @@ -457,6 +458,40 @@ void HamiltonianMicromagneticWidget::set_dmi() } } } +void HamiltonianMicromagneticWidget::set_damping() +{ + // Closure to set the parameters of a specific spin system + auto apply = [this](int idx_image) -> void + { + + float alpha; + alpha = lineEdit_damping->text().toFloat(); + + Hamiltonian_Set_damping(state.get(), alpha, this->comboBox_region->currentIndex(), idx_image); + + + + }; + + if (this->comboBox_Hamiltonian_Ani_ApplyTo->currentText() == "Current Image") + { + apply(System_Get_Index(state.get())); + } + else if (this->comboBox_Hamiltonian_Ani_ApplyTo->currentText() == "Current Image Chain") + { + for (int i = 0; i < Chain_Get_NOI(state.get()); ++i) + { + apply(i); + } + } + else if (this->comboBox_Hamiltonian_Ani_ApplyTo->currentText() == "All Images") + { + for (int img = 0; img < Chain_Get_NOI(state.get()); ++img) + { + apply(img); + } + } +} void HamiltonianMicromagneticWidget::set_ddi_checkBox() { // Closure to set the parameters of a specific spin system @@ -650,6 +685,8 @@ void HamiltonianMicromagneticWidget::Setup_Slots() // DDI connect(this->checkBox_ddi, SIGNAL(stateChanged(int)), this, SLOT(set_ddi_checkBox())); connect(this->lineEdit_ddi, SIGNAL(returnPressed()), this, SLOT(set_ddi_lineEdit())); + //damping + connect(this->lineEdit_damping, SIGNAL(returnPressed()), this, SLOT(set_damping())); //frozen_spins connect(this->checkBox_frozen_spins, SIGNAL(stateChanged(int)), this, SLOT(set_frozen_spins())); diff --git a/ui-cpp/src/ParametersWidget.cpp b/ui-cpp/src/ParametersWidget.cpp index 8439760f2..3873849d4 100644 --- a/ui-cpp/src/ParametersWidget.cpp +++ b/ui-cpp/src/ParametersWidget.cpp @@ -64,10 +64,21 @@ void ParametersWidget::Load_Parameters_Contents() int image_type; int i1, i2; bool b1, b2, b3, b4, b5; - + Parameters_LLG_Get_dt(this->state.get(), &d); + this->lineEdit_dt->setText(QString::number(d)); + Parameters_LLG_Get_max_torque(this->state.get(), &d); + this->lineEdit_max_torque->setText(QString::number(d)); + Parameters_LLG_Get_max_move(this->state.get(), &d); + this->lineEdit_max_move->setText(QString::number(d)); + Parameters_LLG_Get_n_LBFGS(this->state.get(), &i1); + this->lineEdit_n_LBFGS->setText(QString::number(i1)); + Parameters_LLG_Get_grouped_iterations(this->state.get(), &i1); + this->lineEdit_grouped_iterations->setText(QString::number(i1)); + Parameters_LLG_Get_save_period(this->state.get(), &i1); + this->lineEdit_save_period->setText(QString::number(i1)); // LLG // Direct minimization - b1 = Parameters_LLG_Get_Direct_Minimization(state.get()); + /*b1 = Parameters_LLG_Get_Direct_Minimization(state.get()); this->checkBox_llg_direct->setChecked(b1); // Damping d = Parameters_LLG_Get_Damping(state.get()); @@ -94,7 +105,7 @@ void ParametersWidget::Load_Parameters_Contents() this->lineEdit_llg_temperature_dir_z->setText(QString::number(vd[2])); // Convergence d = Parameters_LLG_Get_Convergence(state.get()); - this->spinBox_llg_convergence->setValue(std::log10(d)); + this->spinBox_llg_convergence->setValue(std::log10(d));*/ // Output Parameters_LLG_Get_N_Iterations(state.get(), &i1, &i2); this->lineEdit_llg_n_iterations->setText(QString::number(i1)); @@ -234,7 +245,7 @@ void ParametersWidget::set_parameters_llg() bool b1, b2, b3, b4; // Direct minimization - b1 = this->checkBox_llg_direct->isChecked(); + /*b1 = this->checkBox_llg_direct->isChecked(); Parameters_LLG_Set_Direct_Minimization(this->state.get(), b1, idx_image); // Convergence @@ -248,11 +259,11 @@ void ParametersWidget::set_parameters_llg() // Damping d = this->lineEdit_Damping->text().toFloat(); - Parameters_LLG_Set_Damping(this->state.get(), d, idx_image); + Parameters_LLG_Set_Damping(this->state.get(), d, idx_image);*/ // Spin polarised current - b1 = this->radioButton_stt_gradient->isChecked(); + /*b1 = this->radioButton_stt_gradient->isChecked(); if (this->checkBox_llg_stt->isChecked()) d = this->doubleSpinBox_llg_stt_magnitude->value(); else @@ -274,11 +285,26 @@ void ParametersWidget::set_parameters_llg() doubleSpinBox_llg_stt_polarisation_z->setValue(1.0); } else { throw(ex); } - } - Parameters_LLG_Set_STT(state.get(), b1, d, vd, idx_image); + }*/ + d = this->lineEdit_dt->text().toFloat(); + Parameters_LLG_Set_dt(this->state.get(), d, idx_image); + d = this->lineEdit_max_torque->text().toFloat(); + Parameters_LLG_Set_max_torque(this->state.get(), d, idx_image); + d = this->lineEdit_max_move->text().toFloat(); + Parameters_LLG_Set_max_move(this->state.get(), d, idx_image); + i1 = this->lineEdit_n_LBFGS->text().toInt(); + Parameters_LLG_Set_n_LBFGS(this->state.get(), i1, idx_image); + i1 = this->lineEdit_grouped_iterations->text().toInt(); + Parameters_LLG_Set_grouped_iterations(this->state.get(), i1, idx_image); + i1 = this->lineEdit_save_period->text().toInt(); + Parameters_LLG_Set_save_period(this->state.get(), i1, idx_image); + + + + //Parameters_LLG_Set_STT(state.get(), b1, d, vd, idx_image); // Temperature - if (this->checkBox_llg_temperature->isChecked()) + /*if (this->checkBox_llg_temperature->isChecked()) { d = this->doubleSpinBox_llg_temperature->value(); d2 = this->lineEdit_llg_temperature_inclination->text().toFloat(); @@ -293,9 +319,9 @@ void ParametersWidget::set_parameters_llg() vd[0] = 0; vd[1] = 0; vd[2] = 0; - } - Parameters_LLG_Set_Temperature(state.get(), d, idx_image); - Parameters_LLG_Set_Temperature_Gradient(state.get(), d2, vd, idx_image); + }*/ + /*Parameters_LLG_Set_Temperature(state.get(), d, idx_image); + Parameters_LLG_Set_Temperature_Gradient(state.get(), d2, vd, idx_image);*/ // Output i1 = this->lineEdit_llg_n_iterations->text().toInt(); @@ -583,7 +609,7 @@ void ParametersWidget::Setup_Parameters_Slots() { // LLG // Direct minimization - connect(this->checkBox_llg_direct, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_llg())); + /*connect(this->checkBox_llg_direct, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_llg())); // Temperature connect(this->checkBox_llg_temperature, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_llg())); connect(this->doubleSpinBox_llg_temperature, SIGNAL(editingFinished()), this, SLOT(set_parameters_llg())); @@ -603,7 +629,7 @@ void ParametersWidget::Setup_Parameters_Slots() connect(this->lineEdit_Damping, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); connect(this->lineEdit_dt, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); // Convergence criterion - connect(this->spinBox_llg_convergence, SIGNAL(editingFinished()), this, SLOT(set_parameters_llg())); + connect(this->spinBox_llg_convergence, SIGNAL(editingFinished()), this, SLOT(set_parameters_llg()));*/ // Output connect(this->lineEdit_llg_n_iterations, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); connect(this->lineEdit_llg_log_steps, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); @@ -617,7 +643,12 @@ void ParametersWidget::Setup_Parameters_Slots() connect(this->checkBox_llg_output_energy_divide, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_llg())); connect(this->checkBox_llg_output_configuration_step, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_llg())); connect(this->checkBox_llg_output_configuration_archive, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_llg())); - + connect(this->lineEdit_dt, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); + connect(this->lineEdit_max_torque, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); + connect(this->lineEdit_max_move, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); + connect(this->lineEdit_n_LBFGS, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); + connect(this->lineEdit_grouped_iterations, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); + connect(this->lineEdit_save_period, SIGNAL(returnPressed()), this, SLOT(set_parameters_llg())); // MC // Paramters connect(this->checkBox_mc_temperature, SIGNAL(stateChanged(int)), this, SLOT(set_parameters_mc())); @@ -699,12 +730,12 @@ void ParametersWidget::Setup_Parameters_Slots() void ParametersWidget::Setup_Input_Validators() { // LLG - this->lineEdit_Damping->setValidator(this->number_validator_unsigned); + /*this->lineEdit_Damping->setValidator(this->number_validator_unsigned); this->lineEdit_dt->setValidator(this->number_validator_unsigned); this->lineEdit_llg_temperature_inclination->setValidator(this->number_validator); this->lineEdit_llg_temperature_dir_x->setValidator(this->number_validator); this->lineEdit_llg_temperature_dir_y->setValidator(this->number_validator); - this->lineEdit_llg_temperature_dir_z->setValidator(this->number_validator); + this->lineEdit_llg_temperature_dir_z->setValidator(this->number_validator);*/ // GNEB this->lineEdit_gneb_springconstant->setValidator(this->number_validator_unsigned); this->lineEdit_gneb_springforceratio->setValidator(this->number_validator_unsigned); diff --git a/ui-cpp/ui/ControlWidget.ui b/ui-cpp/ui/ControlWidget.ui index 27eb08c7e..bb996d3ed 100644 --- a/ui-cpp/ui/ControlWidget.ui +++ b/ui-cpp/ui/ControlWidget.ui @@ -144,7 +144,7 @@ - -1 + 6 0 @@ -300,18 +300,8 @@ - SIB + Depondt - - - SIB - - - - - Heun - - Depondt @@ -322,11 +312,6 @@ RK4 - - - VP - - LBFGS_OSO @@ -334,12 +319,12 @@ - LBFGS_Atlas + VP_OSO - VP_OSO + CG_OSO @@ -353,33 +338,13 @@ - MC + LLG - - - MC - - LLG - - - GNEB - - - - - MMF - - - - - EMA - - diff --git a/ui-cpp/ui/HamiltonianMicromagneticWidget.ui b/ui-cpp/ui/HamiltonianMicromagneticWidget.ui index f60aaf21d..05d912d96 100644 --- a/ui-cpp/ui/HamiltonianMicromagneticWidget.ui +++ b/ui-cpp/ui/HamiltonianMicromagneticWidget.ui @@ -95,8 +95,128 @@ 0 + + + + Qt::Vertical + + + + 20 + 40 + + + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + 120 + 0 + + + + + Current Image + + + + + Current Image Chain + + + + + All Images + + + + + + + + + 60 + 16777215 + + + + Apply to + + + + + + + + + Qt::Vertical + + + QSizePolicy::Fixed + + + + 20 + 10 + + + + - + + + + + + + + DDI + + + + + + + Exchange [J/m] + + + + + + + Qt::Vertical + + + QSizePolicy::Fixed + + + + 20 + 10 + + + + + + + @@ -124,10 +244,40 @@ - - + + - + + + + 60 + 16777215 + + + + + + + + + 60 + 16777215 + + + + + + + + + 60 + 16777215 + + + + + + Qt::Horizontal @@ -139,13 +289,33 @@ - - - - Ms + + + + + + + + + 60 + 16777215 + + + + + Qt::Horizontal + + + + 40 + 20 + + + + @@ -164,9 +334,6 @@ - - - @@ -261,30 +428,48 @@ - - + + - - - - 60 - 16777215 - + + + + + + + + Anisotropy [J/m^3] + + + + + + + + + Direction (x,y,z) - - - + + + + Qt::Horizontal + + - 60 - 16777215 + 30 + 20 - + - - + + + + + + 60 @@ -293,8 +478,8 @@ - - + + Qt::Horizontal @@ -308,36 +493,37 @@ - - - - - - - + + + + 60 16777215 - - - - - - + + + + Qt::Horizontal + + - 60 - 16777215 + 40 + 20 - + - - + + + + + + Qt::Horizontal @@ -349,115 +535,50 @@ - - - - - - Qt::Vertical - - - QSizePolicy::Fixed - - - - 20 - 10 - - - - - - - - Qt::Vertical - - - QSizePolicy::Fixed - - - - 20 - 10 - - - - - - - - + + + + Ms + + - - - - Periodical boundaries - - - - - - - DMI (bulk/interface) - - - - - + + - External Field [T] - - - false - - - false + Frozen spins - - - - + + + + - Direction (x,y,z) + a - - - - Qt::Horizontal - - - - 30 - 20 - + + + + b - + - - - - - - - - - 60 - 16777215 - + + + + c - - + + Qt::Horizontal @@ -471,36 +592,30 @@ - - - - Qt::Vertical + + + + External Field [T] - - QSizePolicy::Fixed + + false - - - 20 - 10 - + + false - + - - - - - - - 60 - 16777215 - - - - - - + + + + DMI (bulk/interface) + + + + + + + Qt::Horizontal @@ -512,12 +627,22 @@ + + + + + 60 + 16777215 + + + + - - - - + + + + Qt::Horizontal @@ -530,17 +655,27 @@ - - - - 60 - 16777215 - + + + Cell sizes + + + + Qt::Horizontal + + + + 40 + 20 + + + + @@ -574,33 +709,77 @@ - - - - DDI - - + + + + + + + 60 + 16777215 + + + + + + + + + + + + 60 + 16777215 + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + - - - - Frozen spins + + + + Qt::Vertical - - - - - - Anisotropy [J/m^3] + + QSizePolicy::Fixed - + + + 20 + 10 + + + - - - - Exchange [J/m] + + + + Qt::Vertical - + + QSizePolicy::Fixed + + + + 20 + 10 + + + @@ -618,31 +797,17 @@ - - + + + + Periodical boundaries + + + + + - - - a - - - - - - - b - - - - - - - c - - - - - + Qt::Horizontal @@ -654,12 +819,19 @@ + + + + Damping + + + - - - - + + + + Qt::Horizontal @@ -671,32 +843,8 @@ - - - - Cell sizes - - - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - + 60 @@ -705,107 +853,10 @@ - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - - - 60 - 16777215 - - - - Apply to - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - - - - 120 - 0 - - - - - Current Image - - - - - Current Image Chain - - - - - All Images - - - - - - - - - - Qt::Vertical - - - QSizePolicy::Fixed - - - - 20 - 10 - - - - - - - - Qt::Vertical - - - - 20 - 40 - - - - diff --git a/ui-cpp/ui/ParametersWidget.ui b/ui-cpp/ui/ParametersWidget.ui index 1983806fb..7ba548ee4 100644 --- a/ui-cpp/ui/ParametersWidget.ui +++ b/ui-cpp/ui/ParametersWidget.ui @@ -90,8 +90,8 @@ 0 0 - 513 - 670 + 483 + 667 @@ -110,24 +110,63 @@ LLG - - - - Qt::Vertical - - - QSizePolicy::Fixed - - - - 20 - 30 - - - + + + + + + + 60 + 16777215 + + + + Apply to + + + + + + + + 120 + 0 + + + + + Current Image + + + + + Current Image Chain + + + + + All Images + + + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + - - + + 0 @@ -135,108 +174,95 @@ - Parameters + Output - - - - - + + + + + + + + 20 + 0 + + - 60 + 80 16777215 - - Inclination - - - + + 60 16777215 - - 0 - - - - - Qt::Horizontal - - - - 40 - 20 - + + + + log steps - + - - - - - - - - - 40 - 16777215 - - + + - 1 + n_iterations - - - - - 40 - 16777215 - - + + + + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + - 0 + Final - - - - - 40 - 16777215 - - + + - 0 + Initial - - - - - 100 - 16777215 - - + + - Direction (x,y,z) + Any - - + + + + + + Qt::Horizontal @@ -248,126 +274,182 @@ - - - - - - Direct Minimization - - - - - - - - - 4 - - - 1000.000000000000000 + + + + + 100 + 16777215 + - - + + - K + Folder - - - - Damping - - - - - + + - - - - 40 - 0 - + + + Energy Step - - - 50 - 16777215 - + + + + + + Energy divide by NOS - + - [ps] + Energy Archive - - - - Qt::Horizontal - - - - 40 - 20 - + + + + Energy Spin Resolved - + - - - - + + + + Configuration Step + + + + + + + Configuration Archive + + + + + + + + + + + + Qt::Vertical + + + + 20 + 40 + + + + + + + + Qt::Vertical + + + QSizePolicy::Fixed + + + + 20 + 30 + + + + + + + + + 0 + 0 + + + + Parameters + + + + - Convergence at + Time Step [ps] - - + + + + Max torque + + + + + - + + + + 40 + 0 + + - 15 + 50 16777215 - - 1E - - + + + Qt::Horizontal + + + + 40 + 20 + + + + + + + + + + - 60 + 40 0 - - -20 - - - 20 - - - -8 + + + 50 + 16777215 + - - + + Qt::Horizontal @@ -381,30 +463,17 @@ - - + + - Spin Torque + n_LBFGS - - + + - - - 4 - - - 1000.000000000000000 - - - 0.100000000000000 - - - - - + Qt::Horizontal @@ -416,100 +485,58 @@ - - - - - - Gradient method - - - false - - - - - - - Monolayer approximation - - - true - - - - - - - - - - - Time Step - - - - - - - + + - 45 - 16777215 + 40 + 0 - - 3 - - - -100.000000000000000 - - - 100.000000000000000 - - - - - - 45 + 50 16777215 - - 3 - - - -100.000000000000000 - - - 100.000000000000000 - - - + + + + + + LBFGS max_move + + + + + + + Grouped iterations + + + + + + + + + + 40 + 0 + + - 45 + 50 16777215 - - 3 - - - -100.000000000000000 - - - 100.000000000000000 - - - + + Qt::Horizontal @@ -523,23 +550,7 @@ - - - - - 40 - 0 - - - - - 50 - 16777215 - - - - - + Qt::Horizontal @@ -552,101 +563,26 @@ - - - - - - Polarisation (x y z) - - - + + - - - Qt::Horizontal - - - QSizePolicy::Minimum - - + + - 15 - 20 + 40 + 0 - - - - - - - - Temperature - - - - - - - - - - - 0 - 0 - - - - Output - - - - - - - - Final - - - - - - - Initial - - - - - - - Any - - - - - - - - - - - Folder - - - - - - 100 + 50 16777215 - - + + Qt::Horizontal @@ -660,180 +596,49 @@ - - - - - - Energy Step - - - - - - - Energy divide by NOS - - - - - - - Energy Archive - - - - - - - Energy Spin Resolved - - - - - - - Configuration Step - - - - - - - Configuration Archive - - - - - - - - - Qt::Horizontal - - - - 40 - 20 - + + + + Save period - + - - - - + + + + - 20 + 40 0 - 80 + 50 16777215 - - - - n_iterations - - - - - - - log steps + + + + Qt::Horizontal - - - - - + - 60 - 16777215 + 40 + 20 - + - - - - Qt::Vertical - - - - 20 - 40 - - - - - - - - - - - 60 - 16777215 - - - - Apply to - - - - - - - - 120 - 0 - - - - - Current Image - - - - - Current Image Chain - - - - - All Images - - - - - - - - Qt::Horizontal - - - - 40 - 20 - - - - - - @@ -842,7 +647,7 @@ 0 0 483 - 637 + 667 @@ -1194,7 +999,7 @@ 0 0 483 - 715 + 667 @@ -1740,7 +1545,7 @@ 0 0 483 - 637 + 667 @@ -2055,7 +1860,7 @@ 0 0 483 - 637 + 667 @@ -2407,14 +2212,6 @@ comboBox_LLG_ApplyTo - checkBox_llg_temperature - doubleSpinBox_llg_temperature - checkBox_llg_stt - doubleSpinBox_llg_stt_magnitude - doubleSpinBox_llg_stt_polarisation_x - doubleSpinBox_llg_stt_polarisation_y - doubleSpinBox_llg_stt_polarisation_z - lineEdit_Damping lineEdit_dt lineEdit_llg_output_folder lineEdit_llg_n_iterations