Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Implemented GPU runner #362

Merged
merged 2 commits into from
Oct 2, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 33 additions & 1 deletion cmake/functions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,36 @@ function(gc_add_mlir_dialect_library name)
if(GcInterface IN_LIST ARGN)
target_link_libraries(obj.${name} PUBLIC GcInterface)
endif()
endfunction()
endfunction()

macro(gc_add_mlir_tool name)
# the dependency list copied from mlir/tools/mlir-cpu-runner/CMakeLists.txt of upstream
if(NOT DEFINED LLVM_LINK_COMPONENTS)
set(LLVM_LINK_COMPONENTS
Core
Support
nativecodegen
native
)
endif()
if(NOT DEFINED MLIR_LINK_COMPONENTS)
gc_set_mlir_link_components(MLIR_LINK_COMPONENTS
MLIRAnalysis
MLIRBuiltinToLLVMIRTranslation
MLIRExecutionEngine
MLIRIR
MLIRJitRunner
MLIRLLVMDialect
MLIRLLVMToLLVMIRTranslation
MLIRToLLVMIRTranslationRegistration
MLIRParser
MLIRTargetLLVMIRExport
MLIRSupport
)
endif()
add_mlir_tool(${ARGV})
#LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable
target_link_libraries(${name} PRIVATE GcInterface ${MLIR_LINK_COMPONENTS})
llvm_update_compile_flags(${name})
set_property(GLOBAL APPEND PROPERTY GC_TOOLS ${name})
endmacro()
2 changes: 2 additions & 0 deletions include/gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h
Original file line number Diff line number Diff line change
Expand Up @@ -240,6 +240,7 @@ struct OclModule {

struct OclModuleBuilderOpts {
StringRef funcName = {};
bool printIr = false;
bool enableObjectDump = false;
ArrayRef<StringRef> sharedLibPaths = {};
void (*pipeline)(OpPassManager &) = nullptr;
Expand Down Expand Up @@ -267,6 +268,7 @@ struct OclModuleBuilder {

private:
ModuleOp mlirModule;
const bool printIr;
const bool enableObjectDump;
const ArrayRef<StringRef> sharedLibPaths;
void (*const pipeline)(OpPassManager &);
Expand Down
7 changes: 6 additions & 1 deletion lib/gc/ExecutionEngine/GPURuntime/ocl/GpuOclRuntime.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -749,7 +749,8 @@ ArrayRef<Type> getArgTypes(const StringRef &funcName, ModuleOp &mod) {

OclModuleBuilder::OclModuleBuilder(ModuleOp module,
const OclModuleBuilderOpts &opts)
: mlirModule(module), enableObjectDump(opts.enableObjectDump),
: mlirModule(module), printIr(opts.printIr),
enableObjectDump(opts.enableObjectDump),
sharedLibPaths(opts.sharedLibPaths),
pipeline(opts.pipeline
? opts.pipeline
Expand Down Expand Up @@ -799,6 +800,10 @@ OclModuleBuilder::build(const OclRuntime::Ext &ext) {

auto staticMain = createStaticMain(mod, funcName, argTypes);

if (printIr) {
mod.dump();
}

ExecutionEngineOptions opts;
opts.jitCodeGenOptLevel = llvm::CodeGenOptLevel::Aggressive;
opts.enableObjectDump = enableObjectDump;
Expand Down
18 changes: 18 additions & 0 deletions src/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,21 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################

add_subdirectory(dnnl)
add_subdirectory(gc-cpu-runner)
add_subdirectory(gc-gpu-runner)
add_subdirectory(gc-opt)
34 changes: 2 additions & 32 deletions src/gc-cpu-runner/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,38 +29,8 @@ if(GC_DEV_LINK_LLVM_DYLIB)
MLIRExecutionEngineShared
MLIRJitRunner
)
else()
# the dependency list copied from mlir/tools/mlir-cpu-runner/CMakeLists.txt of upstream
set(LLVM_LINK_COMPONENTS
Core
Support
nativecodegen
native
)
set(MLIR_LINK_COMPONENTS
MLIRAnalysis
MLIRBuiltinToLLVMIRTranslation
MLIRExecutionEngine
MLIRIR
MLIRJitRunner
MLIRLLVMDialect
MLIRLLVMToLLVMIRTranslation
MLIRToLLVMIRTranslationRegistration
MLIRParser
MLIRTargetLLVMIRExport
MLIRSupport
)
endif()

#LLVM_LINK_COMPONENTS is processed by LLVM cmake in add_llvm_executable
set(gc_cpu_runner_libs
${MLIR_LINK_COMPONENTS}
GcCpuRuntime)
add_mlir_tool(gc-cpu-runner
gc-cpu-runner.cpp

)
llvm_update_compile_flags(gc-cpu-runner)

target_link_libraries(gc-cpu-runner PRIVATE GcInterface ${gc_cpu_runner_libs})
gc_add_mlir_tool(gc-cpu-runner gc-cpu-runner.cpp)
target_link_libraries(gc-cpu-runner PRIVATE GcCpuRuntime)
mlir_check_all_link_libraries(gc-cpu-runner)
28 changes: 28 additions & 0 deletions src/gc-gpu-runner/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
################################################################################
# Copyright (C) 2024 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions
# and limitations under the License.
# SPDX-License-Identifier: Apache-2.0
################################################################################

if(NOT GC_ENABLE_TOOLS OR NOT GC_ENABLE_IMEX)
message(STATUS "Gpu runner is not enabled.")
return()
endif()

gc_add_mlir_tool(gc-gpu-runner GpuRunner.cpp)
target_link_libraries(gc-gpu-runner PRIVATE
GcJitWrapper
GcGpuOclRuntime
)
mlir_check_all_link_libraries(gc-gpu-runner)
174 changes: 174 additions & 0 deletions src/gc-gpu-runner/GpuRunner.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
/*
* Copyright (C) 2024 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions
* and limitations under the License.
*
* SPDX-License-Identifier: Apache-2.0
*/

#include "gc/ExecutionEngine/Driver/Driver.h"
#include "gc/ExecutionEngine/GPURuntime/GpuOclRuntime.h"
#include "gc/Transforms/Passes.h"
#include "gc/Utils/Error.h"

#include "mlir/Dialect/Arith/IR/Arith.h"
#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
#include "mlir/ExecutionEngine/JitRunner.h"
#include "mlir/ExecutionEngine/OptUtils.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Tools/ParseUtilities.h"
#include "mlir/Transforms/Passes.h"

#include "llvm/Support/CommandLine.h"
#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/SourceMgr.h"

using namespace mlir;

namespace {
struct Options {
llvm::cl::OptionCategory runnerCategory{"GPU runner options"};
llvm::cl::opt<std::string> inputFilename{
llvm::cl::Positional, llvm::cl::desc("<input file>"), llvm::cl::init("-"),
llvm::cl::cat(runnerCategory)};
llvm::cl::opt<std::string> mainFuncName{
"e",
llvm::cl::desc("The function to be executed. If not specified, the "
"first matching function in the module to be used."),
llvm::cl::value_desc("function name"), llvm::cl::cat(runnerCategory)};
llvm::cl::opt<bool> skipPipeline{
"skip-pipeline",
llvm::cl::desc("Skip the GPU pipeline. It's expected, that the input is "
"already lowered with 'gc-op --gc-gpu-pipeline'."),
llvm::cl::init(false), llvm::cl::cat(runnerCategory)};
llvm::cl::list<std::string> sharedLibs{
"shared-libs",
llvm::cl::desc("Comma separated library paths to link dynamically."),
llvm::cl::MiscFlags::CommaSeparated, llvm::cl::desc("<lib1,lib2,...>"),
llvm::cl::cat(runnerCategory)};
llvm::cl::opt<bool> printIr{
"print-ir",
llvm::cl::desc("Print the resulting IR before the execution."),
llvm::cl::init(false), llvm::cl::cat(runnerCategory)};
llvm::cl::opt<std::string> objDumpFile{
"obj-dump-file",
llvm::cl::desc("Dump the compiled object to the specified file."),
llvm::cl::value_desc("file path"), llvm::cl::cat(runnerCategory)};
};
} // namespace

void findFunc(Options &opts, ModuleOp mod) {
bool (*matcher)(ArrayRef<Type>, ModuleOp &);

if (opts.skipPipeline) {
matcher = [](ArrayRef<Type> args, ModuleOp &mod) {
if (args.size() != 3)
return false;
auto ctx = mod.getContext();
auto ptrType = LLVM::LLVMPointerType::get(ctx);
return args[0] == ptrType && args[1] == ptrType &&
args[2] == IntegerType::get(ctx, 64);
};
} else {
matcher = [](ArrayRef<Type> args, ModuleOp &) { return args.empty(); };
}

if (opts.mainFuncName.empty()) {
auto setFuncName = [&](auto funcOp) {
if (funcOp && !funcOp.isExternal() && funcOp.isPublic() &&
matcher(funcOp.getArgumentTypes(), mod)) {
opts.mainFuncName = funcOp.getName().str();
return true;
}
return false;
};

for (auto &op : mod.getBody()->getOperations()) {
if (setFuncName(dyn_cast<LLVM::LLVMFuncOp>(op)) ||
setFuncName(dyn_cast<func::FuncOp>(op))) {
return;
}
}
gcReportErr("No matching function found.");
}

ArrayRef<Type> args;
if (auto llvmFunc = mod.lookupSymbol<LLVM::LLVMFuncOp>(opts.mainFuncName)) {
args = llvmFunc.getArgumentTypes();
} else if (auto func = mod.lookupSymbol<func::FuncOp>(opts.mainFuncName)) {
args = func.getArgumentTypes();
} else {
gcReportErr("The function '", opts.mainFuncName.c_str(), "' not found.");
}

if (!matcher(args, mod)) {
if (opts.skipPipeline) {
gcReportErr("The function '", opts.mainFuncName.c_str(),
"' signature does not match (!llvm.ptr, !llvm.ptr, i64).");
}
gcReportErr("The function '", opts.mainFuncName.c_str(),
"' must have no arguments.");
}
}

int main(int argc, char **argv) {
Options opts;
llvm::cl::ParseCommandLineOptions(argc, argv, "GraphCompiler GPU runner\n");

std::string errMsg;
auto file = openInputFile(opts.inputFilename, &errMsg);
if (!file) {
gcReportErr("Failed to read input IR: ", errMsg.c_str());
}

auto srcMgr = std::make_shared<llvm::SourceMgr>();
srcMgr->AddNewSourceBuffer(std::move(file), SMLoc());
MLIRContext mlirCtx{gc::initCompilerAndGetDialects()};
auto mlirMod = parseSourceFile<ModuleOp>(srcMgr, {&mlirCtx});
findFunc(opts, *mlirMod);

gc::gpu::OclModuleBuilderOpts builderOpts;
SmallVector<StringRef, 4> sharedLibs(opts.sharedLibs.begin(),
opts.sharedLibs.end());
builderOpts.funcName = opts.mainFuncName;
builderOpts.printIr = opts.printIr;
builderOpts.enableObjectDump = !opts.objDumpFile.getValue().empty();
builderOpts.sharedLibPaths = sharedLibs;
builderOpts.pipeline =
opts.skipPipeline ? [](OpPassManager &) {} : [](OpPassManager &pm) {
gc::GPUPipelineOptions pipelineOpts;
pipelineOpts.isUsmArgs = false;
pipelineOpts.callFinish = true;
populateGPUPipeline(pm, pipelineOpts);
};

gc::gpu::OclModuleBuilder builder{mlirMod, builderOpts};
auto runtime = gcGetOrReport(gc::gpu::OclRuntime::get());
auto oclMod = gcGetOrReport(builder.build(runtime));
assert(oclMod->isStatic);

if (!opts.objDumpFile.getValue().empty()) {
gcLogD("Dumping the compiled object to ", opts.objDumpFile.getValue());
oclMod->dumpToObjectFile(opts.objDumpFile.getValue());
}

auto queue = gcGetOrReport(runtime.createQueue());
gc::gpu::OclContext ctx{runtime, queue};
gc::gpu::StaticExecutor<0> exec{oclMod};
gcLogD("Executing function ", opts.mainFuncName.c_str(), "()");
exec(ctx);
gcGetOrReport(ctx.finish());
gcGetOrReport(runtime.releaseQueue(queue));
return 0;
}
2 changes: 1 addition & 1 deletion test/mlir/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ set(GC_OPT_TEST_DEPENDS

if(GC_ENABLE_IMEX)
include(imex)
list(APPEND GC_OPT_TEST_DEPENDS GcOpenclRuntime)
list(APPEND GC_OPT_TEST_DEPENDS gc-gpu-runner)
endif()

if(GC_ENABLE_BINDINGS_PYTHON)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s

module{

func.func @linalg_matmul(%arg0: tensor<128x256xf16>,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s

module{

func.func @linalg_matmul(%arg0: tensor<128x256xf16>,
Expand Down
5 changes: 2 additions & 3 deletions test/mlir/test/gc/gpu-runner/XeGPU/f16_matmul_64x64.mlir
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s

module{

func.func @linalg_matmul(%arg0: tensor<64x64xf16>,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s

module {
func.func @linalg_mlp(%arg0: tensor<32x4096xf16>, %arg1: tensor<4096x4096xf16>, %arg2 : tensor<32x4096xf16>,
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
// RUN: gc-opt %s --gc-gpu-pipeline="is-usm-args=false use-gpu-ocl=false" \
// RUN: | gc-cpu-runner -e main --entry-point-result=void \
// RUN: --shared-libs=%mlir_runner_utils,%mlir_c_runner_utils,%opencl_runtime | FileCheck %s
// RUN: gc-gpu-runner --shared-libs=%mlir_runner_utils %s | FileCheck %s

module {
func.func @linalg_mlp(%arg0: tensor<32x4096xf16>, %arg1: tensor<4096x4096xf16>, %arg2 : tensor<32x4096xf16>,
Expand Down
Loading