intel · ZhennanQin · Aug 9, 2024 · May 20, 2024 · May 22, 2024 · May 23, 2024
diff --git a/docs/deep_tile_matmul_design.md b/docs/deep_tile_matmul_design.md
diff --git a/include/gc/Analysis/MatmulConfigAnalysis.h b/include/gc/Analysis/MatmulConfigAnalysis.h
@@ -0,0 +1,112 @@
+//===-- MatmulConfigAnalysis.h - the analysis for matmul config -*- C++ -*-===//
+//
+// This file is licensed under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MLIR_ANALYSIS_MATMULCONFIGANALYSIS_H
+#define MLIR_ANALYSIS_MATMULCONFIGANALYSIS_H
+
+#include "gc/Dialect/Linalgx/LinalgxOps.h"
+#include "mlir/Dialect/DLTI/DLTI.h"
+#include "mlir/Dialect/Linalg/IR/Linalg.h"
+#include "mlir/Interfaces/DataLayoutInterfaces.h"
+
+namespace mlir {
+namespace gc {
+
+using namespace mlir;
+
+// The configuration for matmul tiling
+// TODO: support batch matmul
+struct MatmulConfig {
+  // The number of threads distributed to M, N, K
+  uint32_t MThreads, NThreads, KThreads;
+  // The outer block size for M, N, K which will be used to decide the loop tile
+  // size in single thread
+  uint32_t MBlock, NBlock, KBlock;
+  // The innermost block size for M, N, K which will be directly converted to
+  // brgemm.
+  uint32_t innerMostMBlock, innerMostNBlock, innerMostKBlock;
+};
+
+enum DimType { Batch, M, N, K };
+
+// Extract the index of the given DimType in the DimType list
+inline SmallVector<unsigned> extractDimTypeIdx(ArrayRef<DimType> tyList,
+                                               DimType ty) {
+  SmallVector<unsigned> idxList;
+  for (auto [idx, type] : llvm::enumerate(tyList)) {
+    if (type == ty) {
+      idxList.push_back(idx);
+    }
+  }
+  return idxList;
+}
+
+// Get the operand dim type for every operand for the given linalg op
+inline FailureOr<SmallVector<SmallVector<DimType>>>
+getOprandDimType(linalg::LinalgOp &linalgOp) {
+  // TODO: replace the linalgx op with generic op
+  if (llvm::isa<linalg::MatmulOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::M, DimType::K},
+        SmallVector<DimType>{DimType::K, DimType::N},
+        SmallVector<DimType>{DimType::M, DimType::N}};
+  } else if (llvm::isa<linalgx::Mm2DVnniOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::M, DimType::K},
+        SmallVector<DimType>{DimType::N, DimType::K, DimType::K, DimType::N,
+                             DimType::K},
+        SmallVector<DimType>{DimType::M, DimType::N, DimType::M, DimType::N}};
+  } else if (llvm::isa<linalgx::Mm4DVnniOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::M, DimType::K, DimType::M, DimType::K},
+        SmallVector<DimType>{DimType::N, DimType::K, DimType::K, DimType::N,
+                             DimType::K},
+        SmallVector<DimType>{DimType::M, DimType::N, DimType::M, DimType::N}};
+  } else if (llvm::isa<linalg::BatchMatmulOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::Batch, DimType::M, DimType::K},
+        SmallVector<DimType>{DimType::Batch, DimType::K, DimType::N},
+        SmallVector<DimType>{DimType::Batch, DimType::M, DimType::N}};
+  } else if (llvm::isa<linalg::MatmulTransposeAOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::K, DimType::M},
+        SmallVector<DimType>{DimType::K, DimType::N},
+        SmallVector<DimType>{DimType::M, DimType::N}};
+  } else if (llvm::isa<linalg::MatmulTransposeBOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::M, DimType::K},
+        SmallVector<DimType>{DimType::N, DimType::K},
+        SmallVector<DimType>{DimType::M, DimType::N}};
+  } else if (llvm::isa<linalg::BatchMatmulTransposeAOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::Batch, DimType::K, DimType::M},
+        SmallVector<DimType>{DimType::Batch, DimType::K, DimType::N},
+        SmallVector<DimType>{DimType::Batch, DimType::M, DimType::N}};
+  } else if (llvm::isa<linalg::BatchMatmulTransposeBOp>(linalgOp)) {
+    return SmallVector<SmallVector<DimType>>{
+        SmallVector<DimType>{DimType::Batch, DimType::M, DimType::K},
+        SmallVector<DimType>{DimType::Batch, DimType::N, DimType::K},
+        SmallVector<DimType>{DimType::Batch, DimType::M, DimType::N}};
+  }
+  return failure();
+}
+
+// The analysis to extract the matmul configuration from the given linalg op
+struct MatmulConfigAnalysis {
+public:
+  explicit MatmulConfigAnalysis(Operation *root);
+  MatmulConfig getConfig() { return config; }
+
+private:
+  MatmulConfig config;
+};
+
+} // namespace gc
+} // namespace mlir
+
+#endif
diff --git a/include/gc/Transforms/Passes.td b/include/gc/Transforms/Passes.td
@@ -11,12 +11,6 @@
 
 include "mlir/Pass/PassBase.td"
 
-def TileLinalgNamed : Pass<"tile-named-linalg", "func::FuncOp"> {
-  let summary = "Tile linalg named operations.";
-  let dependentDialects =
-      ["linalg::LinalgDialect", "scf::SCFDialect", "tensor::TensorDialect"];
-}
-
 #ifdef GC_HAS_ONEDNN_DIALECT
 def ConvertOneDNNGraphToLinalg : Pass<"convert-onednn-graph-to-linalg"> {
   let summary =
@@ -71,6 +65,18 @@ def IterativeTilingAndFusion : Pass<"iterative-tiling-and-fusion",
            "Decide if enable cost model to control iterative fusion.">,
     ListOption<"defaultTileSize", "default-tile-size", "std::string",
            "Set default TileSize for the certain type of op, saying `matmul:{32,32}`">,
+    ];
+}
+def DeepTileContractionNamedOp
+    : Pass<"deep-tile-contraction-named-op", "func::FuncOp"> {
+  let summary = "Tile linalg contraction named operation deeply";
+  let description =
+      [{The pass tries to tile the linalg contraction named op deeply.}];
+  let dependentDialects = [
+    "func::FuncDialect",
+    "arith::ArithDialect",
+    "tensor::TensorDialect",
+    "linalg::LinalgDialect",
   ];
 }
 
@@ -87,4 +93,17 @@ def VerifyTargetDescription : Pass<"verify-target-description", "ModuleOp"> {
   ];
 }
 
+def SinkOpIntoInnerLoop : Pass<"sink-op-into-inner-loop"> {
+  let summary = "Sink operations into inner loops";
+  let description = [{The pass tries to sink operations into inner loops as deep as possible to maximize the chance for outer loop optimization.
+  }];
+  let dependentDialects = [];
+}
+
+def MergeNestedForall : Pass<"merge-nested-forall"> {
+  let summary = "Merge nested scf.forall operations";
+  let description = [{The pass tries to merge nested forall operations.}];
+  let dependentDialects = ["scf::SCFDialect"];
+}
+
 #endif // GC_DIALECT_GC_PASSES
diff --git a/lib/gc/Analysis/CMakeLists.txt b/lib/gc/Analysis/CMakeLists.txt
@@ -4,6 +4,7 @@ gc_set_mlir_link_components(MLIR_LINK_COMPONENTS
 
 gc_add_mlir_library(GcAnalysis
   TargetDescriptionAnalysis.cpp
+  MatmulConfigAnalysis.cpp
 
   DEPENDS
     GraphCompilerPassIncGen
@@ -12,4 +13,4 @@ gc_add_mlir_library(GcAnalysis
     ${mlir_dialect_libs}
     ${MLIR_LINK_COMPONENTS}
     GcInterface
-  )
+)