From c682c3c280e315d86895d22da45a9b3959907a4d Mon Sep 17 00:00:00 2001 From: Andy Ragusa Date: Mon, 13 Nov 2023 08:43:59 -0800 Subject: [PATCH] Upgraded bytecode compiler to work with Clang/LLVM 16 --- .clang-format | 39 +- .github/workflows/clang-format.yml | 4 +- .github/workflows/cmake.yml | 3 +- CMakeLists.txt | 4 +- clam-format | 7 +- clambcc/clambc-compiler.py | 411 ++++++++++-------- cmake/FindClamAV.cmake | 2 +- cmake/FindClang.cmake | 2 +- cmake/FindLLVM.cmake | 13 +- examples/CMakeLists.txt | 68 +-- examples/LegacyPassManager/CMakeLists.txt | 67 +++ .../HelloWorld/HelloWorld.cpp | 2 +- .../AnalysisPlugin/AnalysisPlugin.cpp | 129 ++++++ .../PassManager/AnalysisPlugin/CMakeLists.txt | 72 +++ examples/PassManager/CMakeLists.txt | 4 + examples/PassManager/input/compile.sh | 47 ++ examples/PassManager/input/run_opt.sh | 4 + headers/bcfeatures.h | 2 +- headers/bytecode_api.h | 18 +- headers/bytecode_api_decl.c.h | 2 +- headers/bytecode_detect.h | 2 +- libclambcc/CMakeLists.txt | 111 ++--- libclambcc/ClamBCAnalyzer/CMakeLists.txt | 44 ++ libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp | 127 +++--- libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h | 72 ++- .../ClamBCChangeMallocArgSize/CMakeLists.txt | 44 ++ .../ClamBCChangeMallocArgSize.cpp | 64 ++- .../ClamBCConvertIntrinsics.cpp | 135 ------ .../CMakeLists.txt | 43 ++ .../ClamBCConvertIntrinsicsTo32Bit.cpp | 212 +++++++++ .../ClamBCExtendPHIsTo64Bit/CMakeLists.txt | 44 ++ .../ClamBCExtendPHIsTo64Bit.cpp | 53 ++- .../ClamBCLogicalCompiler/CMakeLists.txt | 43 ++ .../ClamBCLogicalCompiler.cpp | 259 ++++++++--- .../CMakeLists.txt | 44 ++ .../ClamBCLogicalCompilerHelper.cpp | 221 ++++++++++ libclambcc/ClamBCLowering/CMakeLists.txt | 89 ++++ libclambcc/ClamBCLowering/ClamBCLowering.cpp | 176 ++------ libclambcc/ClamBCLowering/ClamBCLowering.h | 61 +++ libclambcc/ClamBCLowering/ClamBCLoweringF.cpp | 65 +++ .../ClamBCLowering/ClamBCLoweringNF.cpp | 66 +++ libclambcc/ClamBCModule/ClamBCModule.cpp | 30 -- .../CMakeLists.txt | 45 ++ .../ClamBCOutlineEndiannessCalls.cpp | 58 ++- .../ClamBCPrepareGEPsForWriter/CMakeLists.txt | 44 ++ .../ClamBCPrepareGEPsForWriter.cpp | 50 ++- libclambcc/ClamBCPreserveABIs/CMakeLists.txt | 43 ++ .../ClamBCPreserveABIs/ClamBCPreserveABIs.cpp | 60 ++- libclambcc/ClamBCRebuild/CMakeLists.txt | 44 ++ libclambcc/ClamBCRebuild/ClamBCRebuild.cpp | 101 +++-- libclambcc/ClamBCRegAlloc/CMakeLists.txt | 43 ++ .../ClamBCRegAlloc.cpp | 83 ++-- libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h | 112 +++++ libclambcc/ClamBCRemoveFSHL/CMakeLists.txt | 42 ++ .../ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp | 178 ++++++++ .../ClamBCRemoveFreezeInsts/CMakeLists.txt | 43 ++ .../ClamBCRemoveFreezeInsts.cpp | 119 +++++ libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt | 43 ++ .../ClamBCRemoveICMPSLE.cpp | 115 +++++ .../ClamBCRemovePointerPHIs/CMakeLists.txt | 43 ++ .../ClamBCRemovePointerPHIs.cpp | 82 ++-- .../ClamBCRemoveSelectInsts.cpp | 116 ----- libclambcc/ClamBCRemoveUSUB/CMakeLists.txt | 44 ++ .../ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp | 139 ++++++ libclambcc/ClamBCRemoveUndefs/CMakeLists.txt | 42 ++ .../ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp | 64 ++- .../CMakeLists.txt | 42 ++ .../ClamBCRemoveUnsupportedICMPIntrinsics.cpp | 151 +++++++ libclambcc/ClamBCTrace/CMakeLists.txt | 43 ++ libclambcc/ClamBCTrace/ClamBCTrace.cpp | 73 ++-- libclambcc/ClamBCVerifier/CMakeLists.txt | 43 ++ libclambcc/ClamBCVerifier/ClamBCVerifier.cpp | 274 +++++++++--- libclambcc/ClamBCWriter/CMakeLists.txt | 43 ++ libclambcc/ClamBCWriter/ClamBCWriter.cpp | 103 +++-- libclambcc/Common/CMakeLists.txt | 42 ++ libclambcc/Common/ClamBCDiagnostics.cpp | 86 +--- libclambcc/Common/ClamBCModule.h | 25 +- libclambcc/Common/ClamBCUtilities.cpp | 198 ++++++++- libclambcc/Common/ClamBCUtilities.h | 33 +- libclambcc/Common/clambc.h | 12 + test/CMakeLists.txt | 2 +- 81 files changed, 4401 insertions(+), 1427 deletions(-) create mode 100644 examples/LegacyPassManager/CMakeLists.txt rename examples/{ => LegacyPassManager}/HelloWorld/HelloWorld.cpp (96%) create mode 100644 examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp create mode 100644 examples/PassManager/AnalysisPlugin/CMakeLists.txt create mode 100644 examples/PassManager/CMakeLists.txt create mode 100755 examples/PassManager/input/compile.sh create mode 100755 examples/PassManager/input/run_opt.sh create mode 100644 libclambcc/ClamBCAnalyzer/CMakeLists.txt create mode 100644 libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt delete mode 100644 libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp create mode 100644 libclambcc/ClamBCConvertIntrinsicsTo32Bit/CMakeLists.txt create mode 100644 libclambcc/ClamBCConvertIntrinsicsTo32Bit/ClamBCConvertIntrinsicsTo32Bit.cpp create mode 100644 libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt create mode 100644 libclambcc/ClamBCLogicalCompiler/CMakeLists.txt create mode 100644 libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt create mode 100644 libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp create mode 100644 libclambcc/ClamBCLowering/CMakeLists.txt create mode 100644 libclambcc/ClamBCLowering/ClamBCLowering.h create mode 100644 libclambcc/ClamBCLowering/ClamBCLoweringF.cpp create mode 100644 libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp delete mode 100644 libclambcc/ClamBCModule/ClamBCModule.cpp create mode 100644 libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt create mode 100644 libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt create mode 100644 libclambcc/ClamBCPreserveABIs/CMakeLists.txt create mode 100644 libclambcc/ClamBCRebuild/CMakeLists.txt create mode 100644 libclambcc/ClamBCRegAlloc/CMakeLists.txt rename libclambcc/{Common => ClamBCRegAlloc}/ClamBCRegAlloc.cpp (80%) create mode 100644 libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h create mode 100644 libclambcc/ClamBCRemoveFSHL/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp create mode 100644 libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp create mode 100644 libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp create mode 100644 libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt delete mode 100644 libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp create mode 100644 libclambcc/ClamBCRemoveUSUB/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp create mode 100644 libclambcc/ClamBCRemoveUndefs/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt create mode 100644 libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp create mode 100644 libclambcc/ClamBCTrace/CMakeLists.txt create mode 100644 libclambcc/ClamBCVerifier/CMakeLists.txt create mode 100644 libclambcc/ClamBCWriter/CMakeLists.txt create mode 100644 libclambcc/Common/CMakeLists.txt diff --git a/.clang-format b/.clang-format index da3e28f87e..267ddb0a4f 100644 --- a/.clang-format +++ b/.clang-format @@ -2,15 +2,17 @@ Language: Cpp AccessModifierOffset: -2 AlignAfterOpenBracket: Align -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: true -AlignConsecutiveDeclarations: false +AlignConsecutiveMacros: None +AlignConsecutiveAssignments: Consecutive +AlignConsecutiveBitFields: None +AlignConsecutiveDeclarations: None AlignEscapedNewlines: Left -AlignOperands: true +AlignOperands: Align AlignTrailingComments: true AllowAllArgumentsOnNextLine: true AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true +AllowShortEnumsOnASingleLine: true AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: false AllowShortFunctionsOnASingleLine: Empty @@ -21,12 +23,14 @@ AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability BinPackArguments: true BinPackParameters: true BraceWrapping: AfterCaseLabel: false AfterClass: true - AfterControlStatement: false + AfterControlStatement: Never AfterEnum: false AfterFunction: true AfterNamespace: true @@ -36,11 +40,14 @@ BraceWrapping: AfterExternBlock: false BeforeCatch: false BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: true BreakBeforeBraces: Linux BreakBeforeInheritanceComma: false BreakInheritanceList: BeforeColon @@ -59,30 +66,40 @@ Cpp11BracedListStyle: true DeriveLineEnding: true DerivePointerAlignment: true DisableFormat: false +EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH +StatementAttributeLikeMacros: + - Q_EMIT IncludeBlocks: Preserve IncludeCategories: - Regex: '^"(llvm|llvm-c|clang|clang-c)/' Priority: 2 SortPriority: 0 + CaseSensitive: false - Regex: '^(<|"(gtest|gmock|isl|json)/)' Priority: 3 SortPriority: 0 + CaseSensitive: false - Regex: '.*' Priority: 1 SortPriority: 0 + CaseSensitive: false IncludeIsMainRegex: '(Test)?$' IncludeIsMainSourceRegex: '' IndentCaseLabels: true +IndentCaseBlocks: false IndentGotoLabels: true IndentPPDirectives: None +IndentExternBlock: AfterExternBlock +IndentRequires: false IndentWidth: 4 IndentWrappedFunctionNames: false +InsertTrailingCommas: None JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: true @@ -92,6 +109,7 @@ MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBinPackProtocolList: Auto ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true PenaltyBreakAssignment: 2 @@ -102,18 +120,22 @@ PenaltyBreakString: 1000 PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyIndentedWhitespace: 0 PointerAlignment: Right ReflowComments: true SortIncludes: false +SortJavaStaticImport: Before SortUsingDeclarations: true SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements +SpaceAroundPointerQualifiers: Default SpaceBeforeRangeBasedForLoopColon: true SpaceInEmptyBlock: false SpaceInEmptyParentheses: false @@ -125,6 +147,7 @@ SpacesInCStyleCastParentheses: false SpacesInParentheses: false SpacesInSquareBrackets: false SpaceBeforeSquareBrackets: false +BitFieldColonSpacing: Both Standard: Latest StatementMacros: - Q_UNUSED @@ -132,5 +155,11 @@ StatementMacros: TabWidth: 8 UseCRLF: false UseTab: Never +WhitespaceSensitiveMacros: + - STRINGIZE + - PP_STRINGIZE + - BOOST_PP_STRINGIZE + - NS_SWIFT_NAME + - CF_SWIFT_NAME ... diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index 756f1aa23a..eb9d0f9339 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -1,4 +1,4 @@ -name: clang-format +name: clang-format-16 # Controls when the action will run. Triggers the workflow on push or pull request # events but only for the master branch @@ -28,6 +28,6 @@ jobs: - name: Run clang-format style check for C/C++ programs. uses: jidicula/clang-format-action@v4.4.1 with: - clang-format-version: "11" + clang-format-version: "16" check-path: ${{ matrix.path['check'] }} exclude-regex: ${{ matrix.path['exclude'] }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 5285c28e23..13098bdff7 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -84,7 +84,6 @@ jobs: # working-directory: ${{runner.workspace}}/build # run: cpack -C ${{ env.BUILD_TYPE }} - # TODO: Some day add support for building on macOS with any modern LLVM version, not just LLVM-8. # # build-macos: # runs-on: macos-latest @@ -140,7 +139,7 @@ jobs: run: sudo apt-get update - name: Install Dependencies - run: sudo apt-get install -y llvm-8-dev clang-8 clamav # TODO: use just 'llvm-dev' when we can support any recent LLVM version. + run: sudo apt-get install -y llvm-16-dev clang-16 clamav # TODO: use just 'llvm-dev' when we can support any recent LLVM version. - name: Install pytest for easier to read test results run: python3 -m pip install pytest diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f489ac29f..4821aa878e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ string(TIMESTAMP TODAY "%Y%m%d") set(VERSION_SUFFIX "") project( ClamBCC - VERSION "0.105.0" + VERSION "1.3.0" DESCRIPTION "ClamAV Bytecode Compiler." ) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) @@ -106,7 +106,7 @@ if(ENABLE_TESTS) find_package(ClamAV REQUIRED) endif() -find_package(LLVM 8 REQUIRED) +find_package(LLVM 16 REQUIRED) # Do not disable assertions based on CMAKE_BUILD_TYPE. foreach(_build_type "Release" "MinSizeRel" "RelWithDebInfo") diff --git a/clam-format b/clam-format index 303e077d83..bbee2f76c3 100755 --- a/clam-format +++ b/clam-format @@ -1,6 +1,7 @@ #!/bin/bash -clang-format -style='{ Language: Cpp, UseTab: Never, IndentWidth: 4, AlignTrailingComments: true, AlignConsecutiveAssignments: true, AlignAfterOpenBracket: true, AlignEscapedNewlines: Left, AlignOperands: true, AllowShortFunctionsOnASingleLine: Empty, AllowShortIfStatementsOnASingleLine: true, AllowShortLoopsOnASingleLine: true, BreakBeforeBraces: Linux, BreakBeforeTernaryOperators: true, ColumnLimit: 0, FixNamespaceComments: true, SortIncludes: false, MaxEmptyLinesToKeep: 1, SpaceBeforeParens: ControlStatements, IndentCaseLabels: true, DerivePointerAlignment: true }' -dump-config > .clang-format +clang-format-12 -style='{ Language: Cpp, UseTab: Never, IndentWidth: 4, AlignTrailingComments: true, AlignConsecutiveAssignments: true, AlignAfterOpenBracket: true, AlignEscapedNewlines: Left, AlignOperands: true, AllowShortFunctionsOnASingleLine: Empty, AllowShortIfStatementsOnASingleLine: true, AllowShortLoopsOnASingleLine: true, BreakBeforeBraces: Linux, BreakBeforeTernaryOperators: true, ColumnLimit: 0, FixNamespaceComments: true, SortIncludes: false, MaxEmptyLinesToKeep: 1, SpaceBeforeParens: ControlStatements, IndentCaseLabels: true, DerivePointerAlignment: true }' -dump-config > .clang-format -clang-format -i -verbose libclambcc/*/*.cpp -clang-format -i -verbose libclambcc/*/*.h +clang-format-12 -i -verbose `find libclambcc -name "*.cpp"` +clang-format-12 -i -verbose `find libclambcc -name "*.h"` +clang-format-12 -i -verbose `find libclambcc -name "*.c"` diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index 7bf72ab6a8..dcb5a4cc1f 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -11,13 +11,11 @@ #These are the list of supported versions -#consider changing this to start at 8 and go up to 99. That will cover us -#from having to update this when new versions come out. -CLANG_LLVM_KNOWN_VERSIONS = [8, 9, 10, 11, 12] +CLANG_LLVM_KNOWN_VERSIONS = [16] #This is the min clang/llvm version this has been tested with. -MIN_CLANG_LLVM_VERSION = 8 -PREFERRED_CLANG_LLVM_VERSION = 8 +MIN_CLANG_LLVM_VERSION = 16 +PREFERRED_CLANG_LLVM_VERSION = 16 CLANG_NAME = "clang" LLVM_NAME = "opt" @@ -44,15 +42,28 @@ -Wno-constant-conversion \ " +COMMON_WARNING_OPTIONS = [ + "-Wno-backslash-newline-escape" + , "-Wno-pointer-sign" + , "-Wno-return-type" + , "-Wno-incompatible-pointer-types" + , "-Wno-unused-value" + , "-Wno-shift-negative-value" + , "-Wno-implicit-function-declaration" + , "-Wno-incompatible-library-redeclaration" + , "-Wno-implicit-int" + , "-Wno-constant-conversion" + ] + TMPDIR=".__clambc_tmp" -INCDIR = Path(__file__).parent / '..' / 'include' +INCDIR = str(Path(__file__).parent / '..' / 'include') # Check for libclambcc.so at a location relative to this script first. FOUND_SHARED_OBJ = False SHARED_OBJ_DIR = Path(__file__).parent / '..' / 'lib' -if (SHARED_OBJ_DIR / 'libclambcc.so').exists(): +if (SHARED_OBJ_DIR / 'libclambccommon.so').exists(): SHARED_OBJ_FILE = SHARED_OBJ_DIR / 'libclambcc.so' FOUND_SHARED_OBJ = True @@ -109,10 +120,18 @@ def validate(self) -> bool: return True -def run(cmd: str) -> int: +def run(cmd: list) -> int: + cmd = ' '.join(cmd) if VERBOSE: print(cmd) - return os.system(cmd) + + ret = os.system(cmd) + if ret: + print (cmd) + print (ret) + sys.exit(1) + + return ret def die(msg: str, exitStatus: int) -> None: @@ -170,42 +189,40 @@ def compileFile(clangLLVM: ClangLLVM, fileName: str, debugBuild: bool, standardC outFile = getIrFile(fileName, debugBuild) - includePaths = "" + cmd = [] + cmd.append(clangLLVM.getClang()) + #cmd.append("-m32") #TODO: Put this back and resolve issues with it. + cmd.append("-S") + cmd.append("-fno-discard-value-names") + cmd.append("-Wno-implicit-function-declaration") + cmd.append("-fno-vectorize") + cmd.append("--language=c") + cmd.append("-emit-llvm") + cmd.append("-Werror=unused-command-line-argument") + cmd.append("-Xclang") + cmd.append("-disable-O0-optnone") + cmd.append("-Xclang -no-opaque-pointers") + cmd.append(fileName) + cmd.append("-o") + cmd.append(outFile) + cmd.append("-I") + cmd.append(INCDIR) + cmd.append("-include") + cmd.append("bytecode.h") + cmd.append("-D__CLAMBC__") + if options.includes: for i in options.includes: - includePaths += f"-I{i} " + cmd.append("-I") + cmd.append(i) - defines = "" if options.defines: for d in options.defines: - defines += f"-D{d} " - - cmd = f"{clangLLVM.getClang()} \ - -S \ - -fno-discard-value-names \ - --language=c \ - -emit-llvm \ - -Werror=unused-command-line-argument \ - -Xclang \ - -disable-O0-optnone \ - -o {outFile} \ - {fileName} \ - " - - cmd += f" \ - {includePaths} \ - {defines} \ - " + cmd.append('-D') + cmd.append(d) if debugBuild: - cmd += " -g \ - " - - if (not standardCompiler): - cmd += f" -I {INCDIR} \ - -include bytecode.h \ - -D__CLAMBC__ \ - " + cmd.append('-g') if options.disableCommonWarnings: cmd += COMMON_WARNING_OPTIONS @@ -260,8 +277,15 @@ def linkIRFiles(clangLLVM: ClangLLVM, linkedFile: str, irFiles: list) -> int: Given an output file name and list of IR files, link the IR files. Returns the exit status code for the call to `llvm-link`. ''' - inFiles = " ".join(irFiles) - cmd = f"{clangLLVM.getLLVMLink()} -S -o {linkedFile} {inFiles}" + cmd = [] + cmd.append(clangLLVM.getLLVMLink()) + cmd.append("-S") + cmd.append("-o") + cmd.append(linkedFile) + cmd += irFiles + + #TODO: Remove (FUTURE VERSION) + cmd.append("-opaque-pointers=0") return run(cmd) @@ -444,16 +468,27 @@ def getOutputString(linked: IRFile, ignore: IRFile) -> str: def createOptimizedTmpFile(clangLLVM: ClangLLVM, linkedFile: str) -> str: name = getOptimizedTmpFileName(linkedFile) - cmd = f"{clangLLVM.getOpt()} \ + cmd = f'{clangLLVM.getOpt()} \ -S \ {linkedFile} \ -o {name} \ - -internalize -internalize-public-api-list=entrypoint \ - -globalopt \ - " + -internalize-public-api-list=entrypoint \ + --passes="internalize,globalopt" \ + ' + + cmd = [] + cmd.append(clangLLVM.getOpt()) + cmd.append("-S") + cmd.append(linkedFile) + cmd.append("-o") + cmd.append(name) + cmd.append("-internalize-public-api-list=entrypoint") + cmd.append('--passes="internalize,globalopt"') ret = run(cmd) if None == ret: + print ("remoev me") + import pdb ; pdb.set_trace() return None return name @@ -491,109 +526,144 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: return res +INTERNALIZE_API_LIST=[ "_Z10entrypointv" + , "entrypoint" + , "__clambc_kind" + , "__clambc_virusname_prefix" + , "__clambc_virusnames" + , "__clambc_filesize" + , "__clambc_match_counts" + , "__clambc_match_offsets" + , "__clambc_pedata" + , "__Copyright" + ] + +OPTIMIZE_OPTIONS = ["-S" + , "--disable-loop-unrolling" + , " --disable-i2p-p2i-opt" + , " --disable-loop-unrolling" + , " --disable-promote-alloca-to-lds" + , " --disable-promote-alloca-to-vector" + , " --disable-simplify-libcalls" + , " --disable-tail-calls" + , " --vectorize-slp=false" + , " --vectorize-loops=false" + , " -internalize-public-api-list=\"%s\"" % ','.join(INTERNALIZE_API_LIST) + ] + +#TODO: Remove this when we properly handle opaque pointers. +OPTIMIZE_OPTIONS.append("-opaque-pointers=0") + +OPTIMIZE_PASSES = ["function(mem2reg)" + , 'verify' +# , 'clambc-remove-undefs' #TODO: MAY NOT BE NEEDED +# , 'verify' + , 'clambc-preserve-abis' + , 'verify' + , 'default' + , 'globalopt' + , 'clambc-preserve-abis' #remove fake function calls because O3 has already run + , 'verify' + , 'clambc-remove-unsupported-icmp-intrinsics' + , 'verify' + , 'clambc-remove-usub' + , 'verify' + , 'clambc-remove-fshl' + , 'verify' + , 'clambc-lowering-notfinal' # perform lowering pass + , 'verify' + , 'lowerswitch' + , 'verify' + , 'clambc-remove-icmp-sle' + , 'verify' + , 'function(clambc-verifier)' + , 'verify' + , 'clambc-remove-freeze-insts' + , 'verify' + , 'clambc-lowering-notfinal' # perform lowering pass + , 'verify' + , 'clambc-lcompiler-helper' #compile the logical_trigger function to a + , 'verify' + , 'clambc-lcompiler' #compile the logical_trigger function to a + , 'verify' + , 'internalize' + , 'verify' + , 'clambc-rebuild' + , 'verify' + , 'clambc-trace' + , 'verify' + , 'clambc-outline-endianness-calls' + , 'verify' +# , 'clambc-change-malloc-arg-size' #TODO: MAY NOT BE NEEDED +# , 'verify' + , 'clambc-extend-phis-to-64-bit' + , 'verify' + , 'clambc-convert-intrinsics-to-32Bit' + , 'verify' + , 'globalopt' + , 'clambc-prepare-geps-for-writer' + , 'verify' + , 'clambc-writer' + , 'verify' +] + +OPTIMIZE_LOADS=[ f"--load {SHARED_OBJ_DIR}/libclambccommon.so" +# , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveundefs.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcpreserveabis.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveunsupportedicmpintrinsics.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveusub.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovefshl.so" +# , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovepointerphis.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcloweringnf.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveicmpsle.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcverifier.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovefreezeinsts.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcloweringf.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambclogicalcompilerhelper.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambclogicalcompiler.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcrebuild.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambctrace.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcoutlineendiannesscalls.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcchangemallocargsize.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcextendphisto64bit.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcconvertintrinsicsto32bit.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcpreparegepsforwriter.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcanalyzer.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcregalloc.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcwriter.so" +] + + + def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inputSourceFile: str, standardCompiler: bool) -> int: - internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata,__Copyright" - if standardCompiler: - internalizeAPIList += ",main" - - #TODO: Modify ClamBCRemoveUndefs to not require mem2reg to be run before it. - cmd = (f'{clangLLVM.getOpt()} ' - f' -S' - f' -verify-each' - f' -load "{SHARED_OBJ_FILE}"' - f' {inFile}' - f' -o {outFile}' - f' -mem2reg' - f' -clambc-remove-undefs' #add pointer bounds checking. - f' -clambc-preserve-abis' #add fake function calls that use all of - #the arguments so that O3 doesn't change - #the argument lists - f' -O3' - f' -clambc-preserve-abis' #remove fake function calls because O3 has already run - f' -clambc-remove-pointer-phis' - f' -dce' - f' -disable-loop-unrolling' - f' -disable-loop-vectorization' - f' -disable-slp-vectorization' - f' -globaldce' - f' -strip-dead-prototypes' - f' -constmerge' - f' -mem2reg' - f' -always-inline' - f' -globalopt' - f' -lowerswitch' - f' -lowerinvoke' - f' -globalopt' - f' -simplifycfg' - f' -indvars' - f' -constprop' - f' -clambc-lowering-notfinal' # perform lowering pass - f' -lowerswitch' - f' -clambc-verifier' - f' -clambc-lowering-notfinal' # perform lowering pass - f' -dce' - f' -simplifycfg' - f' -mem2reg' - f' -clambc-lcompiler' #compile the logical_trigger function to a - #logical signature. - f' -internalize -internalize-public-api-list="{internalizeAPIList}"' - f' -globaldce' - f' -instcombine' - f' -clambc-rebuild' - f' -verify' - f' -simplifycfg' - f' -dce' - f' -lowerswitch' - f' -clambc-verifier' - f' -verify' - f' -strip-debug-declare' - f' -clambc-lowering-final' - f' -clambc-trace' - f' -dce' - f' -clambc-module' - f' -verify' - f' -globalopt' - f' -remove-selects' - f' -clambc-outline-endianness-calls' #outline the endianness calls - #because otherwise the call - #is replaced with a constant - #that is based on where the - #signature was compiled, and - #won't always be accurate. - f' -clambc-change-malloc-arg-size' #make sure we always use the - #64-bit malloc. - f' -globalopt' - f' -clambc-extend-phis-to-64bit' #make all integer phi nodes 64-bit - #because the llvm runtime inserts a - #cast after phi nodes without - #verifying that there is not - #another phi node after it. - f' -clambc-prepare-geps-for-writer' #format gep indexes to not not - #have more than 2, because - #otherwise the writer gets - #unhappy. - f' -globalopt' - f' -clambc-convert-intrinsics' #convert all memset intrinsics to - #the 32-bit instead of the 64-bit - #intrinsic - f' -clambc-writer' #write the bytecode - f' -clambc-writer-input-source={inputSourceFile}' - f' -clambc-sigfile={sigFile}' - ) - - if standardCompiler: - cmd += f" -clambc-standard-compiler" + cmd = [] + cmd.append(clangLLVM.getOpt()) + cmd.append(inFile) + cmd.append('-o') + cmd.append(outFile) + cmd += OPTIMIZE_OPTIONS + cmd += OPTIMIZE_LOADS + + s = '--passes="' + first = True + for v in OPTIMIZE_PASSES: + if first: + first = False + else: + s += ',' + s += v + s += '"' + cmd.append(s) - return run(cmd) + cmd.append(f'-clambc-writer-input-source={inputSourceFile}') + cmd.append(f'-clambc-sigfile={sigFile}') -def genExe(clangLLVM: ClangLLVM, optimizedFile: str, outputFile: str) -> int: - cmd = f"{clangLLVM.getClang} {optimizedFile} -o {outputFile}" return run(cmd) -#This is definitely hacky, but I *think* it's the only change I need to make for +#This is definitely hacky, but it's the only change I need to make for #this to work def fixFileSize(optimizedFile: str) -> None: f = open(optimizedFile) @@ -779,16 +849,11 @@ def main(): parser.add_option(CLANG_BINARY_ARG, dest="clangBinary", help="Path to clang binary") parser.add_option(OPT_BINARY_ARG, dest="optBinary", help="Path to opt binary") -# parser.add_option("--generate-exe", dest="genexe", action="store_true", -# default=False, help="This is if you want to build a correctly formatted bytecode \ -# signature as an executable for debugging (NOT IMPLEMENTED)") parser.add_option("-I", action="append", dest="includes", default=None) parser.add_option("-D", action="append", dest="defines", default=None) parser.add_option("--disable-common-warnings", dest="disableCommonWarnings", - action="store_true", default=False, - help=f"{COMMON_WARNING_OPTIONS} (Found in some bytecode signatures).") -# parser.add_option("--standard-compiler", dest="standardCompiler", action="store_true", default=False, -# help="This is if you want to build a normal c program as an executable to test the compiler.") + action="store_true", default=True, + help="{%s} (Found in some bytecode signatures)." % (' '.join(COMMON_WARNING_OPTIONS))) (options, args) = parser.parse_args() if options.version: @@ -800,10 +865,7 @@ def main(): if None == clangLLVM: sys.exit(1) - options.genexe = False - options.standardCompiler = False - - options.passthroughOptions = " ".join(parser.getPassthrough()) + options.passthroughOptions = parser.getPassthrough() if not FOUND_SHARED_OBJ: die(f"libclambcc.so not found. See instructions for building", 2) @@ -817,26 +879,14 @@ def main(): outFile = getOutfile(options, args) outFile = os.path.basename(outFile) saveFiles = options.save - bCompiler = options.standardCompiler - buildExecutable = bCompiler or options.genexe createdDir = False - #Add the compiled bytecode file extension, so that all the getName functions can find it - if bCompiler: - idx = outFile.find(COMPILED_BYTECODE_FILE_EXTENSION) - if -1 == idx: - outFile += f".{COMPILED_BYTECODE_FILE_EXTENSION}" - if not os.path.isdir(TMPDIR): os.makedirs(TMPDIR) createdDir = True -# if options.genexe: -# inFile = os.path.join(os.path.dirname(__file__), 'clambc-compiler-main.c') -# args.append(inFile) -# - res = compileFiles(clangLLVM, args, False, bCompiler, options) + res = compileFiles(clangLLVM, args, False, False, options) if not res: linkedFile = getLinkedFileName(outFile) @@ -844,40 +894,12 @@ def main(): if not res: inputSourceFile = getInputSourceFileName(outFile) - if bCompiler: - f = open(inputSourceFile, "w") - f.close() - else: - res = createInputSourceFile(clangLLVM, inputSourceFile, args, options) + res = createInputSourceFile(clangLLVM, inputSourceFile, args, options) if not res: optimizedFile = getOptimizedFileName(outFile) outFile = getOutfile(options, args) - res = optimize(clangLLVM, linkedFile, optimizedFile, outFile, inputSourceFile, bCompiler) - - if not res: - if options.genexe: - - #Add the 'main' and all the stuff that clam provides (TODO: make this configurable by the user) - mainFile = os.path.join(os.path.dirname(__file__), 'clambc-compiler-main.c') - res = compileFile(clangLLVM, mainFile, False, False, options) - if res: - print("Build FAILED") - import pdb ; pdb.set_trace() - - if not res: - mainIRFile = getIrFile(mainFile, False) - - fixFileSize(optimizedFile) - fixFileSize(mainIRFile) - - res = linkIRFiles(clangLLVM, optimizedFile, [optimizedFile, mainIRFile]) - - bCompiler = True - - if not res: - if bCompiler: - res = genExe(clangLLVM, optimizedFile, outFile) + res = optimize(clangLLVM, linkedFile, optimizedFile, outFile, inputSourceFile, False) if ((not saveFiles) and createdDir): shutil.rmtree(TMPDIR) @@ -891,3 +913,6 @@ def main(): if '__main__' == __name__: main() + + + diff --git a/cmake/FindClamAV.cmake b/cmake/FindClamAV.cmake index 0a23a1bd9d..1a5f634716 100644 --- a/cmake/FindClamAV.cmake +++ b/cmake/FindClamAV.cmake @@ -26,7 +26,7 @@ find_program(clambc_EXECUTABLE HINTS "${ClamAV_HOME}" PATH_SUFFIXES "bin" ) -if(NOT clambc_EXECUTABLE_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) +if(NOT clambc_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) message("Unable to find clambc") endif() diff --git a/cmake/FindClang.cmake b/cmake/FindClang.cmake index 4db126c9ed..9ba32f096f 100644 --- a/cmake/FindClang.cmake +++ b/cmake/FindClang.cmake @@ -30,7 +30,7 @@ #============================================================================= -set(KNOWN_VERSIONS 11 10 9 8 7 6.0 5.0 4.0 3.9 3.8) +set(KNOWN_VERSIONS 16) foreach(version ${KNOWN_VERSIONS}) if(DEFINED Clang_FIND_VERSION AND Clang_FIND_VERSION VERSION_EQUAL version) diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake index 9e94b2d509..fdfecb26b1 100644 --- a/cmake/FindLLVM.cmake +++ b/cmake/FindLLVM.cmake @@ -44,7 +44,6 @@ elseif(NOT LLVM_CONFIG_EXECUTABLE) foreach(i RANGE 0 9) list(APPEND LLVM_FIND_VERSION_CONCAT llvm-config${LLVM_FIND_VERSION_CONCAT_PREFIX}${i}) endforeach() - message("llvm-config list: ${LLVM_FIND_VERSION_CONCAT}") find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-${LLVM_FIND_VERSION} ${LLVM_FIND_VERSION_CONCAT} llvm-config DOC "llvm-config executable") @@ -133,7 +132,7 @@ if(LLVM_FOUND) OUTPUT_STRIP_TRAILING_WHITESPACE ) - if(NOT ${LLVM_VERSION} VERSION_LESS "3.8.0") + if(NOT ${LLVM_VERSION} VERSION_LESS "16") execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --shared-mode OUTPUT_VARIABLE _LLVM_SHARED_MODE @@ -148,16 +147,6 @@ if(LLVM_FOUND) set(LLVM_SHARED_MODE OFF) endif() - # potentially add include dir from binary dir for non-installed LLVM - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --src-root - OUTPUT_VARIABLE _llvmSourceRoot - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(FIND "${LLVM_INCLUDE_DIRS}" "${_llvmSourceRoot}" _llvmIsInstalled) - if(NOT _llvmIsInstalled) - list(APPEND LLVM_INCLUDE_DIRS "${LLVM_INSTALL_PREFIX}/include") - endif() endif() if(LLVM_FIND_REQUIRED AND NOT LLVM_FOUND) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index dea625e7cb..369f3cc1bd 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,67 +1,9 @@ # Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. -# -# The hello object library -# -add_library(hello_obj OBJECT) -target_sources(hello_obj - PRIVATE - HelloWorld/HelloWorld.cpp -) -target_include_directories(hello_obj - PRIVATE - ../libclambcc # HACK: For Common/clambc.h - ${LLVM_INCLUDE_DIRS} -) +#'PassManager' is using the 'new' passmanager. This was added +#for the upgrade to llvm 16, although the 'new' pass manager +#has been around a while. -set_target_properties(hello_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") - -# -# For testing -# -#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) - -# -# The hello shared library. -# -add_library( hello SHARED ) -target_link_libraries( hello - PUBLIC - hello_obj ) -set_target_properties( hello PROPERTIES - VERSION ${LIBCLAMBC_VERSION} - SOVERSION ${LIBCLAMBC_SOVERSION} ) - -target_link_directories(hello_obj PRIVATE ${LLVM_LIBRARY_DIRS}) -target_link_libraries(hello_obj PUBLIC ${LLVM_LIBS}) - -if(WIN32) - install(TARGETS hello DESTINATION .) - - # Also install shared library (DLL) dependencies - install(CODE [[ - file(GET_RUNTIME_DEPENDENCIES - LIBRARIES - $ - RESOLVED_DEPENDENCIES_VAR _r_deps - UNRESOLVED_DEPENDENCIES_VAR _u_deps - DIRECTORIES - ${LLVM_LIBRARY_DIRS} - ) - foreach(_file ${_r_deps}) - string(TOLOWER ${_file} _file_lower) - if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") - file(INSTALL - DESTINATION "${CMAKE_INSTALL_PREFIX}" - TYPE SHARED_LIBRARY - FOLLOW_SYMLINK_CHAIN - FILES "${_file}" - ) - endif() - endforeach() - #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") - ]]) -else() - install(TARGETS hello DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() +add_subdirectory(LegacyPassManager) +add_subdirectory(PassManager) diff --git a/examples/LegacyPassManager/CMakeLists.txt b/examples/LegacyPassManager/CMakeLists.txt new file mode 100644 index 0000000000..dea625e7cb --- /dev/null +++ b/examples/LegacyPassManager/CMakeLists.txt @@ -0,0 +1,67 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The hello object library +# +add_library(hello_obj OBJECT) +target_sources(hello_obj + PRIVATE + HelloWorld/HelloWorld.cpp +) + +target_include_directories(hello_obj + PRIVATE + ../libclambcc # HACK: For Common/clambc.h + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(hello_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) + +# +# The hello shared library. +# +add_library( hello SHARED ) +target_link_libraries( hello + PUBLIC + hello_obj ) +set_target_properties( hello PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(hello_obj PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(hello_obj PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS hello DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS hello DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() diff --git a/examples/HelloWorld/HelloWorld.cpp b/examples/LegacyPassManager/HelloWorld/HelloWorld.cpp similarity index 96% rename from examples/HelloWorld/HelloWorld.cpp rename to examples/LegacyPassManager/HelloWorld/HelloWorld.cpp index cdd120b5e7..f8680f20d6 100644 --- a/examples/HelloWorld/HelloWorld.cpp +++ b/examples/LegacyPassManager/HelloWorld/HelloWorld.cpp @@ -10,7 +10,7 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "Common/clambc.h" +//#include "Common/clambc.h" using namespace llvm; namespace { diff --git a/examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp b/examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp new file mode 100644 index 0000000000..450236f98a --- /dev/null +++ b/examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp @@ -0,0 +1,129 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +//#include "Common/clambc.h" +//#include "Common/ClamBCUtilities.h" + +#include +#include +#include + +#include +#include +#include + +#include + +using namespace llvm; +using namespace std; + +/* Modeled after CallGraphAnalysis */ + +namespace +{ + + class AnalysisResult { + public: + AnalysisResult(){ + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << "\n"; + } + + }; + + class ExampleAnalysis : public AnalysisInfoMixin + { + + public: + + friend AnalysisInfoMixin ; + static AnalysisKey Key; + + + ExampleAnalysis(){ + } + + typedef AnalysisResult Result; + + AnalysisResult run(llvm::Module & F, llvm::ModuleAnalysisManager & fam){ + + llvm::errs() << "<" << "Analysis::" << __LINE__ << ">" << "\n"; + return AnalysisResult(); + + } + + }; + + AnalysisKey ExampleAnalysis::Key; + + struct ExamplePass : public PassInfoMixin + { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + public: + + virtual ~ExamplePass() {} + + PreservedAnalyses run(Module & m, ModuleAnalysisManager & MAM) + { + pMod = &m; + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << "Transform Pass" << "\n"; + + MAM.getResult(m); + + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << "Transform Pass (leaving)" << "\n"; + + return PreservedAnalyses::all(); + } + }; // end of struct ExamplePass + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { + return { + LLVM_PLUGIN_API_VERSION, "ExamplePass", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if(Name == "example-pass-with-analysis"){ + FPM.addPass(ExamplePass()); + return true; + } + return false; + } + ); + + PB.registerAnalysisRegistrationCallback( + [](ModuleAnalysisManager &mam) { + mam.registerPass([] () { return ExampleAnalysis(); } ); + } + ); + } + }; +} + + + diff --git a/examples/PassManager/AnalysisPlugin/CMakeLists.txt b/examples/PassManager/AnalysisPlugin/CMakeLists.txt new file mode 100644 index 0000000000..9a8ad5d6db --- /dev/null +++ b/examples/PassManager/AnalysisPlugin/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The analysisplugin object library +# +add_library(analysisplugin_obj OBJECT) +target_sources(analysisplugin_obj + PRIVATE + AnalysisPlugin.cpp +) + +target_include_directories(analysisplugin_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(analysisplugin_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(analysisplugin_obj -DLOG_BEFORE_AFTER=1) + +# +# The analysisplugin shared library. +# +add_library( analysisplugin SHARED ) +target_link_libraries( analysisplugin + PUBLIC + analysisplugin_obj ) +set_target_properties( analysisplugin PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(analysisplugin PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(analysisplugin PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS analysisplugin DESTINATION .) + + # Also install shared library (DLL) dependencies + install(CODE [[ + file(GET_RUNTIME_DEPENDENCIES + LIBRARIES + $ + RESOLVED_DEPENDENCIES_VAR _r_deps + UNRESOLVED_DEPENDENCIES_VAR _u_deps + DIRECTORIES + ${LLVM_LIBRARY_DIRS} + ) + foreach(_file ${_r_deps}) + string(TOLOWER ${_file} _file_lower) + if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") + file(INSTALL + DESTINATION "${CMAKE_INSTALL_PREFIX}" + TYPE SHARED_LIBRARY + FOLLOW_SYMLINK_CHAIN + FILES "${_file}" + ) + endif() + endforeach() + #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") + ]]) +else() + install(TARGETS analysisplugin DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/examples/PassManager/CMakeLists.txt b/examples/PassManager/CMakeLists.txt new file mode 100644 index 0000000000..ec133b4ea2 --- /dev/null +++ b/examples/PassManager/CMakeLists.txt @@ -0,0 +1,4 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + + +add_subdirectory(AnalysisPlugin) diff --git a/examples/PassManager/input/compile.sh b/examples/PassManager/input/compile.sh new file mode 100755 index 0000000000..ac8d245db5 --- /dev/null +++ b/examples/PassManager/input/compile.sh @@ -0,0 +1,47 @@ +#!/bin/bash + + +SOURCE_FILE=analysis_test.c + +echo "#include " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo "int func2(int i){ " >> $SOURCE_FILE +echo " return i/2; " >> $SOURCE_FILE +echo "} " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo "int func(int idx){ " >> $SOURCE_FILE +echo " int tmp; " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " if (idx > 1){ " >> $SOURCE_FILE +echo " tmp = func2(11); " >> $SOURCE_FILE +echo " } else { " >> $SOURCE_FILE +echo " tmp = func(idx-1); " >> $SOURCE_FILE +echo " } " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " if (0 == tmp){ " >> $SOURCE_FILE +echo " return 0; " >> $SOURCE_FILE +echo " } " >> $SOURCE_FILE +echo " return idx-1; " >> $SOURCE_FILE +echo "} " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo "int main(int argc, char ** argv){ " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " if (argc){ " >> $SOURCE_FILE +echo " func(argc); " >> $SOURCE_FILE +echo " } " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " return 0; " >> $SOURCE_FILE +echo "} " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE + + +clang-16 \ + -S \ + -fno-discard-value-names \ + --language=c \ + -emit-llvm \ + -Werror=unused-command-line-argument \ + -Xclang \ + -disable-O0-optnone \ + $SOURCE_FILE diff --git a/examples/PassManager/input/run_opt.sh b/examples/PassManager/input/run_opt.sh new file mode 100755 index 0000000000..77544e4eb5 --- /dev/null +++ b/examples/PassManager/input/run_opt.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +opt-16 -load-pass-plugin examples/NewPassManager/AnalysisPlugin/libanalysisplugin.so -passes=example-pass-with-analysis analysis_test.ll -o analysis_test.t.ll + diff --git a/headers/bcfeatures.h b/headers/bcfeatures.h index 96883abcde..86b5879a6c 100644 --- a/headers/bcfeatures.h +++ b/headers/bcfeatures.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * Authors: Török Edvin diff --git a/headers/bytecode_api.h b/headers/bytecode_api.h index ea0e354473..5f4f1b4e0b 100644 --- a/headers/bytecode_api.h +++ b/headers/bytecode_api.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * Authors: Török Edvin, Kevin Lin @@ -152,13 +152,27 @@ enum FunctionalityLevels { FUNC_LEVEL_0103_4 = 125, /**< LibClamAV release 0.103.4 */ FUNC_LEVEL_0103_5 = 126, /**< LibClamAV release 0.103.5 */ FUNC_LEVEL_0103_6 = 127, /**< LibClamAV release 0.103.6 */ + FUNC_LEVEL_0103_7 = 128, /**< LibClamAV release 0.103.7 */ + FUNC_LEVEL_0103_8 = 129, /**< LibClamAV release 0.103.8 */ FUNC_LEVEL_0104 = 140, /**< LibClamAV release 0.104.0 */ FUNC_LEVEL_0104_1 = 141, /**< LibClamAV release 0.104.1 */ FUNC_LEVEL_0104_2 = 142, /**< LibClamAV release 0.104.2 */ FUNC_LEVEL_0104_3 = 143, /**< LibClamAV release 0.104.3 */ + FUNC_LEVEL_0104_4 = 144, /**< LibClamAV release 0.104.4 */ - FUNC_LEVEL_0105 = 150, /**< LibClamAV release 0.105.0 */ + FUNC_LEVEL_0105 = 150, /**< LibClamAV release 0.105.0 */ + FUNC_LEVEL_0105_1 = 151, /**< LibClamAV release 0.105.1 */ + FUNC_LEVEL_0105_2 = 152, /**< LibClamAV release 0.105.2 */ + + FUNC_LEVEL_1_0 = 160, /**< LibClamAV release 1.0.0 */ + FUNC_LEVEL_1_0_1 = 161, /**< LibClamAV release 1.0.1 */ + + FUNC_LEVEL_1_1 = 180, /**< LibClamAV release 1.1.0 */ + + FUNC_LEVEL_1_2 = 190, /**< LibClamAV release 1.2.0 */ + + FUNC_LEVEL_1_3 = 200, /**< LibClamAV release 1.3.0 */ }; /** diff --git a/headers/bytecode_api_decl.c.h b/headers/bytecode_api_decl.c.h index 4328796bfb..c00684e085 100644 --- a/headers/bytecode_api_decl.c.h +++ b/headers/bytecode_api_decl.c.h @@ -2,7 +2,7 @@ * ClamAV bytecode internal API * This is an automatically generated file! * - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * * Redistribution and use in source and binary forms, with or without diff --git a/headers/bytecode_detect.h b/headers/bytecode_detect.h index 71cc195f15..1e2efc00f3 100644 --- a/headers/bytecode_detect.h +++ b/headers/bytecode_detect.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * * Redistribution and use in source and binary forms, with or without diff --git a/libclambcc/CMakeLists.txt b/libclambcc/CMakeLists.txt index 4812a8f87c..def262f64c 100644 --- a/libclambcc/CMakeLists.txt +++ b/libclambcc/CMakeLists.txt @@ -1,89 +1,28 @@ # Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. -# -# The clambcc object library -# -add_library(clambcc_obj OBJECT) -target_sources(clambcc_obj - PRIVATE - ClamBCLowering/ClamBCLowering.cpp - ClamBCVerifier/ClamBCVerifier.cpp - ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp - ClamBCRebuild/ClamBCRebuild.cpp - ClamBCTrace/ClamBCTrace.cpp - ClamBCModule/ClamBCModule.cpp - ClamBCWriter/ClamBCWriter.cpp - ClamBCAnalyzer/ClamBCAnalyzer.cpp - Common/ClamBCDiagnostics.cpp - Common/ClamBCUtilities.cpp - Common/ClamBCRegAlloc.cpp - Common/version.c - ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp - ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp - ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp - ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp - ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp - ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp - ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp - ClamBCPreserveABIs/ClamBCPreserveABIs.cpp - ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp -) -target_include_directories(clambcc_obj - PRIVATE - ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) - . # For Common/clambc.h - Common # For clambc.h #TODO: change all passes to use "Common" and then delete this line. - ${LLVM_INCLUDE_DIRS} -) - -set_target_properties(clambcc_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") - -# -# For testing -# -#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) - -# -# The clambcc shared library. -# -add_library( clambcc SHARED ) -target_link_libraries( clambcc - PUBLIC - clambcc_obj ) -set_target_properties( clambcc PROPERTIES - VERSION ${LIBCLAMBC_VERSION} - SOVERSION ${LIBCLAMBC_SOVERSION} ) - -target_link_directories(clambcc PRIVATE ${LLVM_LIBRARY_DIRS}) -target_link_libraries(clambcc PUBLIC ${LLVM_LIBS}) - -if(WIN32) - install(TARGETS clambcc DESTINATION .) - - # Also install shared library (DLL) dependencies - install(CODE [[ - file(GET_RUNTIME_DEPENDENCIES - LIBRARIES - $ - RESOLVED_DEPENDENCIES_VAR _r_deps - UNRESOLVED_DEPENDENCIES_VAR _u_deps - DIRECTORIES - ${LLVM_LIBRARY_DIRS} - ) - foreach(_file ${_r_deps}) - string(TOLOWER ${_file} _file_lower) - if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") - file(INSTALL - DESTINATION "${CMAKE_INSTALL_PREFIX}" - TYPE SHARED_LIBRARY - FOLLOW_SYMLINK_CHAIN - FILES "${_file}" - ) - endif() - endforeach() - #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") - ]]) -else() - install(TARGETS clambcc DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() +set (CLAMBC_LIB_DIR ${CMAKE_CURRENT_SOURCE_DIR}) + +add_subdirectory(ClamBCLogicalCompiler) +add_subdirectory(ClamBCLogicalCompilerHelper) +#add_subdirectory(ClamBCRemoveUndefs) +add_subdirectory(ClamBCPreserveABIs) +add_subdirectory(ClamBCAnalyzer) +add_subdirectory(Common) +add_subdirectory(ClamBCVerifier) +#add_subdirectory(ClamBCRemovePointerPHIs) +add_subdirectory(ClamBCLowering) +add_subdirectory(ClamBCRemoveFreezeInsts) +add_subdirectory(ClamBCWriter) +add_subdirectory(ClamBCTrace) +add_subdirectory(ClamBCOutlineEndiannessCalls) +add_subdirectory(ClamBCChangeMallocArgSize) +add_subdirectory(ClamBCExtendPHIsTo64Bit) +add_subdirectory(ClamBCRebuild) +add_subdirectory(ClamBCRegAlloc) +add_subdirectory(ClamBCConvertIntrinsicsTo32Bit) +add_subdirectory(ClamBCPrepareGEPsForWriter) +add_subdirectory(ClamBCRemoveICMPSLE) +add_subdirectory(ClamBCRemoveUSUB) +add_subdirectory(ClamBCRemoveUnsupportedICMPIntrinsics) +add_subdirectory(ClamBCRemoveFSHL) diff --git a/libclambcc/ClamBCAnalyzer/CMakeLists.txt b/libclambcc/ClamBCAnalyzer/CMakeLists.txt new file mode 100644 index 0000000000..e5692d8e73 --- /dev/null +++ b/libclambcc/ClamBCAnalyzer/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcanalyzer object library +# +add_library(clambcanalyzer_obj OBJECT) +target_sources(clambcanalyzer_obj + PRIVATE + ClamBCAnalyzer.cpp +) + +target_include_directories(clambcanalyzer_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcanalyzer_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcanalyzer_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcanalyzer shared library. +# +add_library( clambcanalyzer SHARED ) +target_link_libraries( clambcanalyzer + PUBLIC + clambcanalyzer_obj ) +set_target_properties( clambcanalyzer PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcanalyzer PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcanalyzer PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcanalyzer DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp index 0527edaa83..90d2e03ad5 100644 --- a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp +++ b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp @@ -20,8 +20,9 @@ * MA 02110-1301, USA. */ #include "ClamBCAnalyzer.h" -#include "Common/ClamBCCommon.h" -#include "Common/ClamBCUtilities.h" + +#include "ClamBCCommon.h" +#include "ClamBCUtilities.h" #include #include @@ -37,7 +38,7 @@ using namespace llvm; -extern cl::opt WriteDI; +AnalysisKey ClamBCAnalyzer::Key; static unsigned getSpecialIndex(StringRef Name) { @@ -67,58 +68,55 @@ static bool compare_lt_functions(Function *A, Function *B) return NA.compare(NB) < 0; } -bool ClamBCAnalyzer::runOnModule(Module &M) +void ClamBCAnalysis::run(Module &m) { - pMod = &M; + pMod = &m; // Determine bytecode kind, default is 0 (generic). kind = 0; - GlobalVariable *GVKind = M.getGlobalVariable("__clambc_kind"); + GlobalVariable *GVKind = pMod->getGlobalVariable("__clambc_kind"); if (GVKind && GVKind->hasDefinitiveInitializer()) { kind = cast(GVKind->getInitializer())->getValue().getZExtValue(); // GVKind->setLinkage(GlobalValue::InternalLinkage); // Do not set the linkage type to internal, because the optimizer will remove it. if (kind >= 65536) { - ClamBCStop("Bytecode kind cannot be higher than 64k\n", &M); + ClamBCStop("Bytecode kind cannot be higher than 64k\n", pMod); } } - GlobalVariable *G = M.getGlobalVariable("__Copyright"); + GlobalVariable *G = pMod->getGlobalVariable("__Copyright"); if (G && G->hasDefinitiveInitializer()) { Constant *C = G->getInitializer(); // std::string c; StringRef c; if (!getConstantStringInfo(C, c)) { - ClamBCStop("Failed to extract copyright string\n", &M); + ClamBCStop("Failed to extract copyright string\n", pMod); } - // copyright = strdup(c.c_str()); copyright = c.str(); - // G->setLinkage(GlobalValue::InternalLinkage); // Do not set the linkage type to internal because the optimizer will remove it. } // Logical signature created by ClamBCLogicalCompiler. - NamedMDNode *Node = M.getNamedMetadata("clambc.logicalsignature"); + NamedMDNode *Node = pMod->getNamedMetadata("clambc.logicalsignature"); logicalSignature = Node ? cast(Node->getOperand(0)->getOperand(0))->getString() : ""; - Node = M.getNamedMetadata("clambc.virusnames"); + Node = pMod->getNamedMetadata("clambc.virusnames"); virusnames = Node ? cast(Node->getOperand(0)->getOperand(0))->getString() : ""; unsigned tid, fid; // unsigned cid; - startTID = tid = clamav::initTypeIDs(typeIDs, M.getContext()); + startTID = tid = clamav::initTypeIDs(typeIDs, pMod->getContext()); // arrays of [2 x i8] .. [7 x i8] used for struct padding for (unsigned i = 1; i < 8; i++) { - const Type *Ty = llvm::ArrayType::get(llvm::Type::getInt8Ty(M.getContext()), + const Type *Ty = llvm::ArrayType::get(llvm::Type::getInt8Ty(pMod->getContext()), i); typeIDs[Ty] = tid++; extraTypes.push_back(Ty); } std::vector types; - // cid=1; fid = 1; - for (Module::global_iterator I = M.global_begin(); I != M.global_end(); ++I) { + for (Module::global_iterator I = pMod->global_begin(); I != pMod->global_end(); ++I) { GlobalVariable *gv = llvm::cast(I); std::set insts; std::set globs; @@ -136,14 +134,14 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // globals, so introduce helper globals for nested constant expressions. if (CE->getOpcode() != Instruction::GetElementPtr) { if (CE->getOpcode() == Instruction::BitCast) { - GlobalVariable *GV = new GlobalVariable(M, CE->getType(), true, + GlobalVariable *GV = new GlobalVariable(*pMod, CE->getType(), true, GlobalValue::InternalLinkage, CE, I->getName() + "_bc"); CEMap[CE] = GV; continue; } errs() << "UNSUPPORTED: " << *CE << "\n"; - ClamBCStop("Unsupported constant expression", &M); + ClamBCStop("Unsupported constant expression", pMod); } ConstantInt *C0 = dyn_cast(CE->getOperand(1)); ConstantInt *C1 = dyn_cast(CE->getOperand(2)); @@ -152,7 +150,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) errs() << "UNSUPPORTED: " << *CE << "\n"; ClamBCStop("Unsupported constant expression, nonzero first" " index", - &M); + pMod); } const DataLayout &dataLayout = pMod->getDataLayout(); @@ -161,18 +159,15 @@ bool ClamBCAnalyzer::runOnModule(Module &M) indices.push_back(CE->getOperand(i)); } Type *IP8Ty = PointerType::getUnqual(Type::getInt8Ty(CE->getContext())); + Type *type = getResultType(CE); - Type *type = CE->getOperand(0)->getType(); - if (llvm::isa(type)) { - type = llvm::cast(type)->getElementType(); - } uint64_t idx = dataLayout.getIndexedOffsetInType(type, indices); Value *Idxs[1]; Idxs[0] = ConstantInt::get(Type::getInt64Ty(CE->getContext()), idx); Constant *C = ConstantExpr::getPointerCast(CE->getOperand(0), IP8Ty); ConstantExpr *NewCE = - cast(ConstantExpr::getGetElementPtr(nullptr, C, + cast(ConstantExpr::getGetElementPtr(C->getType(), C, Idxs)); NewCE = cast(ConstantExpr::getPointerCast(NewCE, CE->getType())); @@ -180,7 +175,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) CE->replaceAllUsesWith(NewCE); } CE = NewCE; - GlobalVariable *GV = new GlobalVariable(M, CE->getType(), true, + GlobalVariable *GV = new GlobalVariable(*pMod, CE->getType(), true, GlobalValue::InternalLinkage, CE, I->getName() + "_" + Twine(v)); @@ -190,6 +185,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // Collect types of all globals. const Type *Ty = I->getType(); + Ty = I->getValueType(); if (!typeIDs.count(Ty)) { extraTypes.push_back(Ty); typeIDs[Ty] = tid++; @@ -199,30 +195,34 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // Sort functions. std::vector functions; - for (Module::iterator I = M.begin(), E = M.end(); I != E;) { + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E;) { Function *F = &*I; ++I; functions.push_back(F); - F->removeFromParent(); } + + for (size_t i = 0; i < functions.size(); i++) { + functions[i]->removeFromParent(); + } + std::sort(functions.begin(), functions.end(), compare_lt_functions); for (std::vector::iterator I = functions.begin(), E = functions.end(); I != E; ++I) { - M.getFunctionList().push_back(*I); + pMod->getFunctionList().push_back(*I); } - Function *ep = M.getFunction("entrypoint"); + Function *ep = pMod->getFunction("entrypoint"); if (!ep) { - ClamBCStop("Bytecode must define an entrypoint (with 0 parameters)!\n", &M); + ClamBCStop("Bytecode must define an entrypoint (with 0 parameters)!\n", pMod); } if (ep->getFunctionType()->getNumParams() != 0) { - ClamBCStop("Bytecode must define an entrypoint with 0 parameters!\n", &M); + ClamBCStop("Bytecode must define an entrypoint with 0 parameters!\n", pMod); } unsigned dbgid = 0; - unsigned MDDbgKind = M.getContext().getMDKindID("dbg"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + unsigned MDDbgKind = pMod->getContext().getMDKindID("dbg"); + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { Function &F = *I; if (F.isDeclaration()) { // Don't add prototypes of debug intrinsics @@ -261,6 +261,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) extraTypes.push_back(Ty); typeIDs[Ty] = tid++; } + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { const Type *Ty; // Skip debug intrinsics, so we don't add llvm.dbg.* types @@ -282,6 +283,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) } else { Ty = II->getType(); } + if (const GetElementPtrInst *GEPI = dyn_cast(&*II)) { const Type *GTy = GEPI->getPointerOperand()->getType(); if (!typeIDs.count(GTy)) { @@ -290,6 +292,20 @@ bool ClamBCAnalyzer::runOnModule(Module &M) typeIDs[GTy] = tid++; } } + + for (size_t i = 0; i < II->getNumOperands(); i++) { + Value *operand = II->getOperand(i); + if (llvm::isa(operand)) { + continue; + } + Type *pt = operand->getType(); + if (0 == typeIDs.count(pt)) { + types.push_back(pt); + extraTypes.push_back(pt); + typeIDs[pt] = tid++; + } + } + if (typeIDs.count(Ty)) { continue; } @@ -316,7 +332,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) continue; } DEBUGERR << *STy << "\n"; - ClamBCStop("Bytecode cannot use abstract types (only pointers to them)!", &M); + ClamBCStop("Bytecode cannot use abstract types (only pointers to them)!", pMod); } } if (!typeIDs.count(STy)) { @@ -328,21 +344,18 @@ bool ClamBCAnalyzer::runOnModule(Module &M) } if (tid >= 65536) { - ClamBCStop("Attempted to use more than 64k types", &M); + ClamBCStop("Attempted to use more than 64k types", pMod); } printGlobals(startTID); - - return false; } -void ClamBCAnalyzer::printGlobals(uint16_t stid) +void ClamBCAnalysis::printGlobals(uint16_t stid) { llvm::Module &M = *pMod; // Describe types maxApi = 0; - // std::vector apis; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { llvm::Function *pFunc = llvm::cast(I); // Skip dead declarations if (I->use_empty()) { @@ -392,20 +405,19 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) for (StringMap::iterator I = globalsMap.begin(), E = globalsMap.end(); I != E; ++I) { - if (GlobalVariable *GV = M.getGlobalVariable(I->getKey())) { + if (GlobalVariable *GV = pMod->getGlobalVariable(I->getKey())) { specialGlobals.insert(GV); globals[GV] = I->getValue(); if (I->getValue() > maxGlobal) maxGlobal = I->getValue(); } } - if (GlobalVariable *GV = M.getGlobalVariable("__clambc_kind")) { + if (GlobalVariable *GV = pMod->getGlobalVariable("__clambc_kind")) { specialGlobals.insert(GV); } - // std::vector globalInits; globalInits.push_back(0); // ConstantPointerNul placeholder - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { + for (Module::global_iterator I = pMod->global_begin(), E = pMod->global_end(); I != E; ++I) { GlobalVariable *pgv = llvm::cast(I); if (specialGlobals.count(pgv)) { continue; @@ -433,8 +445,9 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) &M); } Constant *C = pgv->getInitializer(); - if (C->use_empty()) + if (C->use_empty()) { continue; + } globalInits.push_back(C); globals[pgv] = i++; if (i >= 32768) { @@ -473,7 +486,7 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) } // need to use bytecode_api_decl.c.h -void ClamBCAnalyzer::populateAPIMap() +void ClamBCAnalysis::populateAPIMap() { unsigned id = 1; apiMap["test1"] = id++; @@ -585,14 +598,16 @@ void ClamBCAnalyzer::populateAPIMap() apiMap["bzip2_done"] = id++; } -void ClamBCAnalyzer::getAnalysisUsage(AnalysisUsage &AU) const +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - // Preserve the CFG, we only eliminate PHIs, and introduce some - // loads/stores. - AU.setPreservesAll(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCAnalysis", "v0.1", + [](PassBuilder &PB) { + PB.registerAnalysisRegistrationCallback( + [](ModuleAnalysisManager &mam) { + mam.registerPass([]() { return ClamBCAnalyzer(); }); + }); + }}; } -char ClamBCAnalyzer::ID = 0; -static RegisterPass X("clambc-analyzer", - "ClamAV bytecode register allocator"); - -const PassInfo *const ClamBCAnalyzerID = &X; diff --git a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h index aec37d78cc..ca33380c96 100644 --- a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h +++ b/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h @@ -22,17 +22,22 @@ #ifndef CLAMBC_ANALYZER_H_ #define CLAMBC_ANALYZER_H_ -#include "Common/clambc.h" +#include "clambc.h" -#include -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" +#include +#include +#include +#include +#include #include #include -#include "llvm/Support/raw_ostream.h" +#include + +#include +#include +#include + +#include #include #include @@ -45,7 +50,8 @@ //5. Cannot see where banMap has any functions inserted. Do we need it? //6. Evaluate the TODO in runOnModule. -class ClamBCAnalyzer : public llvm::ModulePass +//class ClamBCAnalyzer : public llvm::PassInfoMixin //llvm::ModulePass +class ClamBCAnalysis { protected: typedef llvm::DenseMap TypeMapTy; @@ -76,6 +82,7 @@ class ClamBCAnalyzer : public llvm::ModulePass unsigned maxGlobal = 0; std::vector globalInits; std::vector mds; + bool WriteDI = false; virtual void printGlobals(uint16_t stid); @@ -102,8 +109,8 @@ class ClamBCAnalyzer : public llvm::ModulePass public: static char ID; - explicit ClamBCAnalyzer() - : ModulePass(ID) + explicit ClamBCAnalysis() + //: ModulePass(ID) { populateAPIMap(); @@ -117,14 +124,28 @@ class ClamBCAnalyzer : public llvm::ModulePass globalsMap["__clambc_match_offsets"] = GLOBAL_MATCH_OFFSETS; } - ~ClamBCAnalyzer() {} - virtual bool runOnModule(llvm::Module &m) override; + virtual uint32_t getHighestTID() + { + uint32_t ret = 0; + for (auto i = typeIDs.begin(), e = typeIDs.end(); i != e; i++) { + if (i->second > ret) { + ret = i->second; + } + } + return ret; + } - virtual void getAnalysisUsage(llvm::AnalysisUsage &au) const override; + ~ClamBCAnalysis() {} + virtual void run(llvm::Module &m); virtual uint32_t getTypeID(const llvm::Type *const t) { TypeMapTy::iterator I = typeIDs.find(t); + if (I == typeIDs.end()) { + DEBUG_NONPOINTER("BAD VALUE"); + DEBUG_VALUE(t); + } + assert((I != typeIDs.end()) && "Type ID requested for unknown type"); return I->second; } @@ -245,4 +266,27 @@ class ClamBCAnalyzer : public llvm::ModulePass } }; +class ClamBCAnalyzer : public llvm::AnalysisInfoMixin +{ + protected: + ClamBCAnalysis clamBCAnalysis; + + public: + friend llvm::AnalysisInfoMixin; + static llvm::AnalysisKey Key; + + ClamBCAnalyzer() + : clamBCAnalysis() {} + virtual ~ClamBCAnalyzer() {} + + typedef ClamBCAnalysis Result; + + ClamBCAnalysis &run(llvm::Module &mod, llvm::ModuleAnalysisManager &mam) + { + clamBCAnalysis.run(mod); + + return clamBCAnalysis; + } +}; + #endif //CLAMBC_ANALYZER_H_ diff --git a/libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt b/libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt new file mode 100644 index 0000000000..354336d8c3 --- /dev/null +++ b/libclambcc/ClamBCChangeMallocArgSize/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcchangemallocargsize object library +# +add_library(clambcchangemallocargsize_obj OBJECT) +target_sources(clambcchangemallocargsize_obj + PRIVATE + ClamBCChangeMallocArgSize.cpp +) + +target_include_directories(clambcchangemallocargsize_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcchangemallocargsize_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcchangemallocargsize_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcchangemallocargsize shared library. +# +add_library( clambcchangemallocargsize SHARED ) +target_link_libraries( clambcchangemallocargsize + PUBLIC + clambcchangemallocargsize_obj ) +set_target_properties( clambcchangemallocargsize PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcchangemallocargsize PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcchangemallocargsize PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcchangemallocargsize DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp b/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp index 65111698bd..c6b003eff0 100644 --- a/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp +++ b/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp @@ -1,20 +1,23 @@ +#include "clambc.h" #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include + +#include +#include -#include "Common/clambc.h" using namespace llvm; -namespace +namespace ChangeMallocArgSize { -class ChangeMallocArgSize : public ModulePass +class ChangeMallocArgSize : public PassInfoMixin { protected: std::vector changeValues; @@ -38,7 +41,8 @@ class ChangeMallocArgSize : public ModulePass for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { CallInst* pCall = llvm::dyn_cast(i); if (pCall) { - if ("malloc" == pCall->getCalledValue()->getName()) { + Function* pFunc = pCall->getCalledFunction(); + if (pFunc && ("malloc" == pFunc->getName())) { Value* pv = pCall->getOperand(0); if (PHINode* pn = llvm::dyn_cast(pv)) { addChangeValue(pn); @@ -63,9 +67,9 @@ class ChangeMallocArgSize : public ModulePass } /* Yes, I know there is a "getTerminator" function, but I have come across blocks - * that have more than one branch instruction (I think it is a bug in the runtime), but - * until that is resolved, I want to use this function. - */ + * that have more than one branch instruction (I think it is a bug in the runtime), but + * until that is resolved, I want to use this function. + */ Instruction* findTerminator(BasicBlock* pb) { Instruction* inst = nullptr; @@ -136,27 +140,41 @@ class ChangeMallocArgSize : public ModulePass } public: - static char ID; ChangeMallocArgSize() - : ModulePass(ID) { } - virtual bool runOnModule(Module& m) override + virtual PreservedAnalyses run(Module& m, ModuleAnalysisManager& MAM) { - pMod = &m; + pMod = &m; + DEBUGERR << "TODO: Evaluate whether or not we still need this." + << "\n"; dstType = Type::getInt64Ty(pMod->getContext()); findSizes(); fixBitWidths(); - return true; + return PreservedAnalyses::none(); } }; // end of struct ChangeMallocArgSize -} // end of anonymous namespace +} // namespace ChangeMallocArgSize -char ChangeMallocArgSize::ID = 0; -static RegisterPass X("clambc-change-malloc-arg-size", "ChangeMallocArgSize Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ChangeMallocArgSize", "v0.1", + [](PassBuilder& PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager& FPM, + ArrayRef) { + if (Name == "clambc-change-malloc-arg-size") { + FPM.addPass(ChangeMallocArgSize::ChangeMallocArgSize()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp b/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp deleted file mode 100644 index 981d97029c..0000000000 --- a/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp +++ /dev/null @@ -1,135 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/DerivedTypes.h" - -#include - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" - -#include - -using namespace llvm; - -namespace -{ - -class ConvertIntrinsics : public ModulePass -{ - - public: - static char ID; - - ConvertIntrinsics() - : ModulePass(ID) {} - - virtual ~ConvertIntrinsics() {} - - virtual bool runOnModule(Module& mod) - { - bChanged = false; - pMod = &mod; - - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function* pFunc = llvm::cast(i); - processFunction(pFunc); - } - - for (size_t i = 0; i < delLst.size(); i++) { - delLst[i]->eraseFromParent(); - } - - return bChanged; - } - - protected: - Module* pMod = nullptr; - bool bChanged = false; - std::vector delLst; - - void processFunction(Function* pFunc) - { - - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - BasicBlock* pBB = llvm::cast(i); - processBasicBlock(pBB); - } - } - - void processBasicBlock(BasicBlock* pBB) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - if (CallInst* pci = llvm::dyn_cast(i)) { - if (Function* f = llvm::dyn_cast(pci->getCalledValue())) { - if ("llvm.memset.p0i8.i64" == f->getName()) { - convertMemset(pci); - } - } - } - } - } - - void convertMemset(CallInst* pci) - { - std::vector args; - Type* i32Ty = Type::getInt32Ty(pMod->getContext()); - - for (size_t i = 0; i < pci->getNumArgOperands(); i++) { - Value* pv = pci->getArgOperand(i); - if (2 == i) { - if (ConstantInt* ci = llvm::dyn_cast(pv)) { - pv = ConstantInt::get(i32Ty, ci->getValue().getLimitedValue()); - } else { - pv = CastInst::CreateTruncOrBitCast(pv, i32Ty, "ConvertIntrinsics_trunc_", pci); - } - } - - args.push_back(pv); - } - - Constant* f = getNewMemset(); - CallInst::Create(getMemsetType(), f, args, "", pci); - delLst.push_back(pci); - } - - llvm::Constant* getNewMemset() - { - static llvm::Constant* ret = nullptr; - - if (nullptr == ret) { - - FunctionType* retType = getMemsetType(); - ret = pMod->getOrInsertFunction("llvm.memset.p0i8.i32", retType); - - assert(ret && "Could not get memset"); - } - - return ret; - } - - llvm::FunctionType* getMemsetType() - { - static FunctionType* retType = nullptr; - if (nullptr == retType) { - LLVMContext& c = pMod->getContext(); - retType = FunctionType::get(Type::getVoidTy(c), - {Type::getInt8PtrTy(c), Type::getInt8Ty(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, - false); - } - return retType; - } -}; - -} // end of anonymous namespace - -char ConvertIntrinsics::ID = 0; -static RegisterPass XX("clambc-convert-intrinsics", "Convert Intrinsics to 32-bit", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCConvertIntrinsicsTo32Bit/CMakeLists.txt b/libclambcc/ClamBCConvertIntrinsicsTo32Bit/CMakeLists.txt new file mode 100644 index 0000000000..8972bc30bc --- /dev/null +++ b/libclambcc/ClamBCConvertIntrinsicsTo32Bit/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcconvertintrinsicsto32bit object library +# +add_library(clambcconvertintrinsicsto32bit_obj OBJECT) +target_sources(clambcconvertintrinsicsto32bit_obj + PRIVATE + ClamBCConvertIntrinsicsTo32Bit.cpp +) + +target_include_directories(clambcconvertintrinsicsto32bit_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcconvertintrinsicsto32bit_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcconvertintrinsicsto32bit_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcconvertintrinsicsto32bit shared library. +# +add_library( clambcconvertintrinsicsto32bit SHARED ) +target_link_libraries( clambcconvertintrinsicsto32bit + PUBLIC + clambcconvertintrinsicsto32bit_obj ) +set_target_properties( clambcconvertintrinsicsto32bit PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcconvertintrinsicsto32bit PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcconvertintrinsicsto32bit PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcconvertintrinsicsto32bit DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCConvertIntrinsicsTo32Bit/ClamBCConvertIntrinsicsTo32Bit.cpp b/libclambcc/ClamBCConvertIntrinsicsTo32Bit/ClamBCConvertIntrinsicsTo32Bit.cpp new file mode 100644 index 0000000000..b310d1357d --- /dev/null +++ b/libclambcc/ClamBCConvertIntrinsicsTo32Bit/ClamBCConvertIntrinsicsTo32Bit.cpp @@ -0,0 +1,212 @@ +#include "clambc.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + + +#include + +using namespace llvm; + +namespace ClamBCConvertIntrinsicsTo32Bit +{ + +class ClamBCConvertIntrinsicsTo32Bit : public PassInfoMixin +{ + + public: + static char ID; + + ClamBCConvertIntrinsicsTo32Bit() {} + + virtual ~ClamBCConvertIntrinsicsTo32Bit() {} + + PreservedAnalyses run(Module& mod, ModuleAnalysisManager& MAM) + { + bChanged = false; + pMod = &mod; + + initializeReplacements(); + + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function* pFunc = llvm::cast(i); + processFunction(pFunc); + } + + for (size_t i = 0; i < delLst.size(); i++) { + delLst[i]->eraseFromParent(); + } + + if (bChanged) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + protected: + Module* pMod = nullptr; + bool bChanged = false; + std::vector delLst; + + typedef struct { + llvm::Function* oldFunc; + llvm::FunctionCallee newFunc; + const size_t paramIdx; + } Replacement; + std::vector replacements; + + llvm::FunctionType* getMemset32Type() + { + LLVMContext& c = pMod->getContext(); + return FunctionType::get(Type::getVoidTy(c), + {Type::getInt8PtrTy(c), Type::getInt8Ty(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, + false); + } + + llvm::FunctionType* getMemcpy32Type() + { + LLVMContext& c = pMod->getContext(); + return FunctionType::get(Type::getVoidTy(c), + {Type::getInt8PtrTy(c), Type::getInt8PtrTy(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, + false); + } + + llvm::FunctionType* getMemmove32Type() + { + LLVMContext& c = pMod->getContext(); + return FunctionType::get(Type::getVoidTy(c), + {Type::getInt8PtrTy(c), Type::getInt8PtrTy(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, + false); + } + + void initializeReplacements() + { + /*There are different calls when you use the -no-opaque flags.*/ + + /*memsets*/ + FunctionType* ft = getMemset32Type(); + Function* pFunc = pMod->getFunction("llvm.memset.p0i8.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memset.p0i8.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + pFunc = pMod->getFunction("llvm.memset.p0.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memset.p0.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + + /*memcpys*/ + ft = getMemcpy32Type(); + pFunc = pMod->getFunction("llvm.memcpy.p0i8.p0i8.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + pFunc = pMod->getFunction("llvm.memcpy.p0.p0.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memcpy.p0.p0.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + + /*memmoves*/ + ft = getMemmove32Type(); + pFunc = pMod->getFunction("llvm.memmove.p0.p0.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memmove.p0.p0.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + pFunc = pMod->getFunction("llvm.memmove.p0i8.p0i8.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memmove.p0i8.p0i8.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + } + + void processFunction(Function* pFunc) + { + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { + BasicBlock* pBB = llvm::cast(i); + processBasicBlock(pBB); + } + } + + void processBasicBlock(BasicBlock* pBB) + { + for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { + if (CallInst* pci = llvm::dyn_cast(i)) { + Function* f = pci->getCalledFunction(); + if (nullptr != f) { + for (size_t i = 0; i < replacements.size(); i++) { + if (replacements[i].oldFunc == f) { + convertCall(pci, replacements[i]); + } + } + } + } + } + } + + void convertCall(CallInst* pci, const Replacement& r) + { + std::vector args; + Type* i32Ty = Type::getInt32Ty(pMod->getContext()); + + for (size_t i = 0; i < pci->arg_size(); i++) { + Value* pv = pci->getArgOperand(i); + if (r.paramIdx == i) { + if (ConstantInt* ci = llvm::dyn_cast(pv)) { + pv = ConstantInt::get(i32Ty, ci->getValue().getLimitedValue()); + } else { + pv = CastInst::CreateTruncOrBitCast(pv, i32Ty, "ClamBCConvertIntrinsicsTo32Bit_trunc_", pci); + } + + pci->setArgOperand(i, pv); + } + + // args.push_back(pv); + } + + //FunctionCallee f = pMod->getOrInsertFunction(newName, ); + pci->setCalledFunction(r.newFunc); + + // CallInst * pNew = CallInst::Create(f, args, "", pci); + // pNew->setAttributes(pci->getAttributes()); + + // delLst.push_back(pci); + } +}; + +} // namespace ClamBCConvertIntrinsicsTo32Bit + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCConvertIntrinsicsTo32Bit", "v0.1", + [](PassBuilder& PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager& FPM, + ArrayRef) { + if (Name == "clambc-convert-intrinsics-to-32Bit") { + FPM.addPass(ClamBCConvertIntrinsicsTo32Bit::ClamBCConvertIntrinsicsTo32Bit()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt b/libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt new file mode 100644 index 0000000000..01084a3fcc --- /dev/null +++ b/libclambcc/ClamBCExtendPHIsTo64Bit/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcextendphisto64bit object library +# +add_library(clambcextendphisto64bit_obj OBJECT) +target_sources(clambcextendphisto64bit_obj + PRIVATE + ClamBCExtendPHIsTo64Bit.cpp +) + +target_include_directories(clambcextendphisto64bit_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcextendphisto64bit_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcextendphisto64bit_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcextendphisto64bit shared library. +# +add_library( clambcextendphisto64bit SHARED ) +target_link_libraries( clambcextendphisto64bit + PUBLIC + clambcextendphisto64bit_obj ) +set_target_properties( clambcextendphisto64bit PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcextendphisto64bit PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcextendphisto64bit PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcextendphisto64bit DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp b/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp index 96566027b8..614d30e16c 100644 --- a/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp +++ b/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp @@ -19,14 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" +#include "bytecode_api.h" #include "clambc.h" -#include "ClamBCModule.h" -#include "ClamBCAnalyzer/ClamBCAnalyzer.h" -#include "Common/ClamBCUtilities.h" +#include "ClamBCUtilities.h" #include -//#include "ClamBCTargetMachine.h" #include #include #include @@ -37,7 +34,6 @@ #include #include #include -//#include "llvm/Config/config.h" #include #include #include @@ -50,12 +46,16 @@ #include +#include +#include + using namespace llvm; -class ClamBCExtendPHIsTo64Bit : public ModulePass +class ClamBCExtendPHIsTo64Bit : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; + bool bChanged = false; virtual void convertPHIs(Function *pFunc) { @@ -115,17 +115,17 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass Instruction *cast = CastInst::CreateIntegerCast(newNode, origType, true, "ClamBCConvertPHINodes_", insPt); pn->replaceAllUsesWith(cast); pn->eraseFromParent(); + bChanged = true; } public: static char ID; - explicit ClamBCExtendPHIsTo64Bit() - : ModulePass(ID) {} + explicit ClamBCExtendPHIsTo64Bit() {} virtual ~ClamBCExtendPHIsTo64Bit() {} - virtual bool runOnModule(Module &m) + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { pMod = &m; @@ -135,16 +135,33 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass convertPHIs(pFunc); } - return true; + if (bChanged) { + /* Since we changed the IR here invalidate all the previous analysis. + * We only want to invalidate the analysis when we change something, + * since it is expensive to compute. + */ + return PreservedAnalyses::none(); + } + /*We didn't change anything, so keep the previous analysis.*/ + return PreservedAnalyses::all(); } }; -char ClamBCExtendPHIsTo64Bit::ID = 0; -static RegisterPass X("clambc-extend-phis-to-64bit", "ClamBCExtendPHIsTo64Bit Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); - -llvm::ModulePass *createClamBCExtendPHIsTo64Bit() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCExtendPHIsTo64Bit(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCExtendPHIsTo64Bit", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-extend-phis-to-64-bit") { + FPM.addPass(ClamBCExtendPHIsTo64Bit()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCLogicalCompiler/CMakeLists.txt b/libclambcc/ClamBCLogicalCompiler/CMakeLists.txt new file mode 100644 index 0000000000..391b8a6a4f --- /dev/null +++ b/libclambcc/ClamBCLogicalCompiler/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambclogicalcompiler object library +# +add_library(clambclogicalcompiler_obj OBJECT) +target_sources(clambclogicalcompiler_obj + PRIVATE + ClamBCLogicalCompiler.cpp +) + +target_include_directories(clambclogicalcompiler_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambclogicalcompiler_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambclogicalcompiler_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambclogicalcompiler shared library. +# +add_library( clambclogicalcompiler SHARED ) +target_link_libraries( clambclogicalcompiler + PUBLIC + clambclogicalcompiler_obj ) +set_target_properties( clambclogicalcompiler PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambclogicalcompiler PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambclogicalcompiler PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambclogicalcompiler DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp b/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp index 18adfb04ae..83e48453fd 100644 --- a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp +++ b/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp @@ -20,38 +20,36 @@ * MA 02110-1301, USA. */ -#include "ClamBCModule.h" -#include -#include "../Common/bytecode_api.h" #include "clambc.h" +#include "bytecode_api.h" #include "ClamBCDiagnostics.h" -#include "ClamBCModule.h" #include "ClamBCCommon.h" #include "ClamBCUtilities.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Analysis/ConstantFolding.h" + +#include +#include +#include +#include +#include #include -#include "llvm/Analysis/ValueTracking.h" +#include #include #include #include #include -//#include -#include -#include +#include +#include +#include #include -#include "llvm/Support/Debug.h" +#include #include -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" +#include +#include #include -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/IPO.h" +#include +#include +#include #include -//#include #include #include @@ -59,17 +57,15 @@ using namespace llvm; -namespace +namespace ClamBCLogicalCompiler { -class ClamBCLogicalCompiler : public ModulePass +class ClamBCLogicalCompiler : public PassInfoMixin { public: - static char ID; - ClamBCLogicalCompiler() - : ModulePass(ID) {} + ClamBCLogicalCompiler() {} - virtual bool runOnModule(Module &M); + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -90,9 +86,6 @@ class ClamBCLogicalCompiler : public ModulePass bool compileVirusNames(Module &M, unsigned kind); }; -char ClamBCLogicalCompiler::ID = 0; -RegisterPass X("clambc-lcompiler", - "ClamAV Logical Compiler"); enum LogicalKind { LOG_SUBSIGNATURE, LOG_AND, @@ -601,14 +594,23 @@ class LogicalCompiler { Value *V = LI.getOperand(0); ConstantExpr *CE = dyn_cast(V); - if (!CE || CE->getOpcode() != Instruction::GetElementPtr || - CE->getOperand(0) != GV || CE->getNumOperands() != 3 || - !cast(CE->getOperand(1))->isZero()) { - printDiagnostic("Logical signature: unsupported read", &LI); - return false; + ConstantInt *CI = nullptr; + if (CE) { + if (CE->getOpcode() != Instruction::GetElementPtr || + CE->getOperand(0) != GV || CE->getNumOperands() != 3 || + !cast(CE->getOperand(1))->isZero()) { + printDiagnostic("Logical signature: unsupported read", &LI); + return false; + } + CI = cast(CE->getOperand(2)); + } else { + /* In this case, we are directly loading the global, + * instead of using a getelementptr. + * It is likely that this would have been changed by O3. + */ + CI = ConstantInt::get(LI.getParent()->getParent()->getParent()->getContext(), APInt(64, 0)); } - ConstantInt *CI = cast(CE->getOperand(2)); - Map[&LI] = LogicalNode::getSubSig(allNodes, CI->getValue().getZExtValue()); + Map[&LI] = LogicalNode::getSubSig(allNodes, CI->getValue().getZExtValue()); return true; } @@ -931,6 +933,7 @@ class LogicalCompiler } Instruction *pInst = llvm::cast(I); + switch (I->getOpcode()) { case Instruction::Load: valid &= processLoad(*cast(I)); @@ -965,18 +968,107 @@ class LogicalCompiler LogicalMap::iterator CondNode = Map.find(SI->getCondition()); LogicalMap::iterator TrueNode = Map.find(SI->getTrueValue()); LogicalMap::iterator FalseNode = Map.find(SI->getFalseValue()); - if (CondNode == Map.end() || TrueNode == Map.end() || FalseNode == Map.end()) { - printDiagnostic("Logical signature: select operands must be logical" - " expressions", + + /*O3 creates blocks that look like the following, which are legitimate blocks. + * This is essentially an AND of all the %cmp.i instructions. + * Since the cmp instructions all have false at the end, comparisons will be skipped + * after one is found to be false, without having a bunch of branch instructions. + * + * We are going to handle these cases by only adding an 'and' or an 'or' if there is + * an actual logical operation, not for constants. + * + + entry: + %0 = load i32, ptr @__clambc_match_counts, align 16 + %cmp.i116.not = icmp eq i32 %0, 0 + %1 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 1), align 4 + %cmp.i112.not = icmp eq i32 %1, 0 + %or.cond = select i1 %cmp.i116.not, i1 %cmp.i112.not, i1 false + %2 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 2), align 8 + %cmp.i108.not = icmp eq i32 %2, 0 + %or.cond1 = select i1 %or.cond, i1 %cmp.i108.not, i1 false + %3 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 3), align 4 + %cmp.i104.not = icmp eq i32 %3, 0 + + + .... + + br i1 %or.cond15, label %lor.rhs, label %lor.end + + lor.rhs: ; preds = %entry + %17 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 17), align 4 + %cmp.i = icmp ne i32 %17, 0 + br label %lor.end + + lor.end: ; preds = %lor.rhs, %entry + %18 = phi i1 [ true, %entry ], [ %cmp.i, %lor.rhs ] + ret i1 %18 + + */ + if (CondNode == Map.end() || (TrueNode == Map.end() && FalseNode == Map.end())) { + printDiagnostic("Logical signature: select condition must be logical" + " expression", SI); return false; } + // select cond, trueval, falseval -> cond && trueval || !cond && falseval - LogicalNode *N = LogicalNode::getAnd(CondNode->second, - TrueNode->second); - LogicalNode *NotCond = LogicalNode::getNot(CondNode->second); - LogicalNode *N2 = LogicalNode::getAnd(NotCond, FalseNode->second); - Map[SI] = LogicalNode::getOr(N, N2); + LogicalNode *N = nullptr; + LogicalNode *NotCond = nullptr; + LogicalNode *N2 = nullptr; + + if (TrueNode != Map.end()) { + N = LogicalNode::getAnd(CondNode->second, + TrueNode->second); + } else if (ConstantInt *pci = llvm::cast(SI->getTrueValue())) { + if (pci->isOne()) { + N = LogicalNode::getNode(*(CondNode->second)); + } else if (not pci->isZero()) { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + NotCond = LogicalNode::getNot(CondNode->second); + if (FalseNode != Map.end()) { + N2 = LogicalNode::getAnd(NotCond, FalseNode->second); + } else if (ConstantInt *pci = llvm::cast(SI->getFalseValue())) { + if (pci->isOne()) { + N2 = NotCond; + } else if (not pci->isZero()) { + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + LogicalNode *res = nullptr; + if (N && N2) { + res = LogicalNode::getOr(N, N2); + } else if (N) { + res = N; + } else if (N2) { + res = N2; + } else { + /*SHOULD be impossible, but will add a check just in case.*/ + printDiagnostic("Logical signature: Malformed select statement.", + SI); + return false; + } + Map[SI] = res; break; } case Instruction::Ret: { @@ -1631,27 +1723,42 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) bool Valid = true; for (auto I : F->users()) { - Value *pv = nullptr; - pv = llvm::cast(I); - CallSite CS(pv); - if (!CS.getInstruction()) { + CallInst *pCallInst = llvm::cast(I); + if (nullptr == pCallInst) { + assert(0 && "NOT sure how this is possible"); continue; } - if (CS.getCalledFunction() != F) { + + if (F != pCallInst->getCalledFunction()) { + + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">NOT SURE HOW THIS IS POSSIBLE\n"; + + /*Not sure how this is possible, either*/ printDiagnostic("setvirusname can only be directly called", - CS.getInstruction()); + pCallInst); + Valid = false; + continue; + } + + if (2 != pCallInst->arg_size()) { + printDiagnostic("setvirusname has 2 args", pCallInst); Valid = false; continue; } - assert(CS.arg_size() == 2 && "setvirusname has 2 args"); + std::string param; llvm::StringRef sr; - Value *V = CS.getArgument(0); + Value *V = llvm::cast(pCallInst->arg_begin()); + if (nullptr == V) { + printDiagnostic("Invalid argument passed to setvirusname", pCallInst); + Valid = false; + continue; + } bool result = getConstantStringInfo(V, sr); param = sr.str(); if (!result) { printDiagnostic("Argument of foundVirus() must be a constant string", - CS.getInstruction()); + pCallInst); Valid = false; continue; } @@ -1662,31 +1769,29 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) if (!p.empty() && !virusNamesSet.count(p)) { printDiagnostic(Twine("foundVirus called with an undeclared virusname: ", p), - CS.getInstruction()); + pCallInst); Valid = false; continue; } // Add prefix std::string fullname = p.empty() ? virusNamePrefix : virusNamePrefix + "." + p.str(); - IRBuilder<> builder(CS.getInstruction()->getParent()); + IRBuilder<> builder(pCallInst->getParent()); Value *C = builder.CreateGlobalStringPtr(fullname.c_str()); IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); - CS.setArgument(0, C); - CS.setArgument(1, ConstantInt::get(I32Ty, fullname.size())); + pCallInst->setArgOperand(0, C); + pCallInst->setArgOperand(1, ConstantInt::get(I32Ty, fullname.size())); } return Valid; } -bool ClamBCLogicalCompiler::runOnModule(Module &M) +PreservedAnalyses ClamBCLogicalCompiler::run(Module &M, ModuleAnalysisManager &MAM) { bool Valid = true; LogicalSignature = ""; virusnames = ""; pMod = &M; - //dumpPHIGraphs(); - // Handle virusname unsigned kind = 0; GlobalVariable *GVKind = M.getGlobalVariable("__clambc_kind"); @@ -1705,14 +1810,17 @@ bool ClamBCLogicalCompiler::runOnModule(Module &M) GVKind->setConstant(true); } if (!compileVirusNames(M, kind)) { - if (!kind || kind == BC_STARTUP) - return true; + if (!kind || kind == BC_STARTUP) { + // return true; + return PreservedAnalyses::all(); + } Valid = false; } if (F) { - LoopInfo &li = getAnalysis(*F).getLoopInfo(); - if (functionHasLoop(F, li)) { + FunctionAnalysisManager &fam = MAM.getResult(M).getManager(); + LoopInfo *li = &fam.getResult(*F); + if (functionHasLoop(F, *li)) { printDiagnostic("Logical signature: loop/recursion not supported", F); Valid = false; } @@ -1842,13 +1950,26 @@ bool ClamBCLogicalCompiler::runOnModule(Module &M) // diagnostic already printed exit(42); } - return true; + return PreservedAnalyses::none(); } -} // namespace -const PassInfo *const ClamBCLogicalCompilerID = &X; - -llvm::ModulePass *createClamBCLogicalCompiler() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCLogicalCompiler(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompiler", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lcompiler") { + FPM.addPass(ClamBCLogicalCompiler()); + return true; + } + return false; + }); + }}; } + +} // namespace ClamBCLogicalCompiler diff --git a/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt b/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt new file mode 100644 index 0000000000..0cc815c218 --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambclogicalcompilerhelper object library +# +add_library(clambclogicalcompilerhelper_obj OBJECT) +target_sources(clambclogicalcompilerhelper_obj + PRIVATE + ClamBCLogicalCompilerHelper.cpp +) + +target_include_directories(clambclogicalcompilerhelper_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambclogicalcompilerhelper_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambclogicalcompilerhelper_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambclogicalcompilerhelper shared library. +# +add_library( clambclogicalcompilerhelper SHARED ) +target_link_libraries( clambclogicalcompilerhelper + PUBLIC + clambclogicalcompilerhelper_obj ) +set_target_properties( clambclogicalcompilerhelper PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambclogicalcompilerhelper PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambclogicalcompilerhelper PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambclogicalcompilerhelper DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp b/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp new file mode 100644 index 0000000000..b2b43050ed --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper/ClamBCLogicalCompilerHelper.cpp @@ -0,0 +1,221 @@ +/* + * Compile LLVM bytecode to logical signatures. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "bytecode_api.h" +#include "ClamBCDiagnostics.h" +#include "ClamBCCommon.h" +#include "ClamBCUtilities.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Since the logical compiler requires 'setvirusname' to only be called with a string constant, + * we are going to undo the PHI nodes added by O3 that would have to + * + * + * Consider the code + + return.sink.split: ; preds = %if.end39, %for.end + %.str.1.sink = phi ptr [ @.str, %for.end ], [ @.str.1, %if.end39 ] + %call.i70 = call i32 @setvirusname(ptr noundef nonnull %.str.1.sink, i32 noundef 0) #6 + br label %return + + We will just add the calls to setvirusname to the predecessor basic blocks. + * + * + */ + +#define DEBUG_TYPE "lsigcompilerhelper" + +using namespace llvm; + +namespace ClamBCLogicalCompilerHelper +{ + +class ClamBCLogicalCompilerHelper : public PassInfoMixin +{ + public: + ClamBCLogicalCompilerHelper() {} + + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const + { + } + + protected: + llvm::Module *pMod = nullptr; + std::vector erase; + bool bChanged = false; + + virtual void populateArgs(const CallInst *pci, std::vector &args) + { + for (auto i = pci->arg_begin(), e = pci->arg_end(); i != e; i++) { + args.push_back(llvm::dyn_cast(i)); + } + } + virtual void processPHI(PHINode *phi, Function *pCalledFunction, std::vector &args); + + virtual void fixupSetVirusNameCalls(); + + size_t getBranchIdx(llvm::BranchInst *pBranch, llvm::BasicBlock *pBB); +}; + +size_t ClamBCLogicalCompilerHelper::getBranchIdx(llvm::BranchInst *pBranch, llvm::BasicBlock *pBB) +{ + for (size_t ret = 0; ret < pBranch->getNumSuccessors(); ret++) { + if (pBranch->getSuccessor(ret) == pBB) { + return ret; + } + } + + ClamBCStop("Branch Instruction is not a predecessor to phi.", pBranch); + + return -1; +} + +/* + * Add calls to setvirusname for each constant string, rather allowing a phinode to + * choose the string. This is a requirement for ClamBCLogicalCompiler. + */ +void ClamBCLogicalCompilerHelper::processPHI(PHINode *phi, Function *pCalledFunction, std::vector &args) +{ + + for (size_t i = 0; i < phi->getNumIncomingValues(); i++) { + BasicBlock *pBB = phi->getIncomingBlock(i); + Value *pVal = phi->getIncomingValue(i); + + Instruction *pTerm = pBB->getTerminator(); + BranchInst *pBranch = llvm::cast(pTerm); /*I know this is a BranchInst, + and not a ReturnInst, because + it is a predecessor block to + my phi node, so no need for + a dyn_cast*/ + size_t branchIdx = getBranchIdx(pBranch, phi->getParent()); + + BasicBlock *pNew = BasicBlock::Create(pMod->getContext(), + "ClamBCLogicalCompilerHelper_call_SetVirusName_", phi->getParent()->getParent(), phi->getParent()); + pBranch->setSuccessor(branchIdx, pNew); + + args[0] = pVal; + + CallInst::Create(pCalledFunction->getFunctionType(), pCalledFunction, args, "ClamBCLogicalCompilerHelper_callInst", pNew); + BranchInst::Create(phi->getParent(), pNew); + } +} + +/* + * Find all calls to setvirusname, and make sure they aren't loading the + * first argument from a variable. + */ +void ClamBCLogicalCompilerHelper::fixupSetVirusNameCalls() +{ + + std::vector calls; + Function *svn = pMod->getFunction("setvirusname"); + if (nullptr == svn) { + return; + } + for (auto iter : svn->users()) { + if (CallInst *pci = llvm::dyn_cast(iter)) { + Value *operand = pci->getOperand(0); + + if (PHINode *phi = llvm::dyn_cast(operand)) { + calls.push_back(pci); + } + } + } + + for (size_t i = 0; i < calls.size(); i++) { + CallInst *pci = calls[i]; + PHINode *phi = llvm::dyn_cast(pci->getOperand(0)); + std::vector args; + populateArgs(pci, args); + processPHI(phi, svn, args); + + erase.push_back(pci); + erase.push_back(phi); + } + + for (size_t i = 0; i < erase.size(); i++) { + erase[i]->eraseFromParent(); + } +} + +PreservedAnalyses ClamBCLogicalCompilerHelper::run(Module &mod, ModuleAnalysisManager &mam) +{ + pMod = &mod; + + fixupSetVirusNameCalls(); + + if (bChanged) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); +} + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompilerHelper", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lcompiler-helper") { + FPM.addPass(ClamBCLogicalCompilerHelper()); + return true; + } + return false; + }); + }}; +} + +} // namespace ClamBCLogicalCompilerHelper diff --git a/libclambcc/ClamBCLowering/CMakeLists.txt b/libclambcc/ClamBCLowering/CMakeLists.txt new file mode 100644 index 0000000000..d6d46c149b --- /dev/null +++ b/libclambcc/ClamBCLowering/CMakeLists.txt @@ -0,0 +1,89 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcloweringf object library +# +add_library(clambcloweringf_obj OBJECT) +target_sources(clambcloweringf_obj + PRIVATE + ClamBCLowering.cpp + ClamBCLoweringF.cpp +) + +target_include_directories(clambcloweringf_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcloweringf_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcloweringf_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcloweringf shared library. +# +add_library( clambcloweringf SHARED ) +target_link_libraries( clambcloweringf + PUBLIC + clambcloweringf_obj ) +set_target_properties( clambcloweringf PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcloweringf PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcloweringf PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcloweringf DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + + + +# +# The clambcloweringnf object library +# +add_library(clambcloweringnf_obj OBJECT) +target_sources(clambcloweringnf_obj + PRIVATE + ClamBCLowering.cpp + ClamBCLoweringNF.cpp +) + +target_include_directories(clambcloweringnf_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcloweringnf_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcloweringnf_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcloweringnf shared library. +# +add_library( clambcloweringnf SHARED ) +target_link_libraries( clambcloweringnf + PUBLIC + clambcloweringnf_obj ) +set_target_properties( clambcloweringnf PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcloweringnf PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcloweringnf PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcloweringnf DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCLowering/ClamBCLowering.cpp b/libclambcc/ClamBCLowering/ClamBCLowering.cpp index 95ce631502..da63d44f99 100644 --- a/libclambcc/ClamBCLowering/ClamBCLowering.cpp +++ b/libclambcc/ClamBCLowering/ClamBCLowering.cpp @@ -19,101 +19,18 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#define DEBUG_TYPE "bclowering" -#include +#include "ClamBCLowering.h" + #include "clambc.h" -#include "ClamBCModule.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/ConstantFolding.h" -#include -#include -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ValueTracking.h" -#include -#include -#include "llvm/CodeGen/IntrinsicLowering.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "llvm/Support/CommandLine.h" -#include + +#include #include -#include -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" -#include -#include "llvm/Transforms/Scalar.h" -#include "llvm/CodeGen/IntrinsicLowering.h" using namespace llvm; -namespace -{ -class ClamBCLowering : public ModulePass -{ - public: - static char ID; - ClamBCLowering() - : ModulePass(ID) {} - - virtual ~ClamBCLowering() {} - - virtual llvm::StringRef getPassName() const - { - return "ClamAV Bytecode Lowering"; - } - virtual bool runOnModule(Module &M); - virtual void getAnalysisUsage(AnalysisUsage &AU) const - { - } - - protected: - virtual bool isFinal() = 0; - - private: - void lowerIntrinsics(IntrinsicLowering *IL, Function &F); - void simplifyOperands(Function &F); - void downsizeIntrinsics(Function &F); - void splitGEPZArray(Function &F); - void fixupBitCasts(Function &F); - void fixupGEPs(Function &F); - void fixupPtrToInts(Function &F); -}; - -class ClamBCLoweringNF : public ClamBCLowering +namespace ClamBCLowering { - public: - ClamBCLoweringNF() {} - virtual ~ClamBCLoweringNF() {} - protected: - virtual bool isFinal() - { - return false; - } -}; - -class ClamBCLoweringF : public ClamBCLowering -{ - public: - ClamBCLoweringF() {} - virtual ~ClamBCLoweringF() {} - - protected: - virtual bool isFinal() - { - return true; - } -}; - -char ClamBCLowering::ID = 0; void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) { std::vector prototypesToGen; @@ -156,7 +73,7 @@ void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) Builder.SetInsertPoint(BO); Value *V = Builder.CreatePointerCast(PII->getOperand(0), PointerType::getUnqual(Type::getInt8Ty(F.getContext()))); - V = Builder.CreateGEP(V, Idx); + V = Builder.CreateGEP(V->getType(), V, Idx); V = Builder.CreatePtrToInt(V, BO->getType()); BO->replaceAllUsesWith(V); } else if (GetElementPtrInst *GEPI = dyn_cast(II)) { @@ -284,7 +201,7 @@ void ClamBCLowering::simplifyOperands(Function &F) if (ConstantExpr *CE = dyn_cast(II->getOperand(i))) { if (CE->getOpcode() == Instruction::GetElementPtr) { // rip out GEP expr and load it - Ops.push_back(new LoadInst(CE, "gepex_load", SI)); + Ops.push_back(new LoadInst(CE->getType(), CE, "gepex_load", SI)); Changed = true; } } else { @@ -368,47 +285,47 @@ static inline void addIntrinsicFunctions(llvm::Module *pMod, Intrinsic::getDeclaration(pMod, Intrinsic::memmove, {i8Ptr, i8Ptr, i32, i1}))); } -static llvm::Value *getReplacementSizeOperand(llvm::CallSite &CS, llvm::Value *Len) +static llvm::Value *getReplacementSizeOperand(llvm::CallInst *pCallInst, llvm::Value *Len) { - llvm::LLVMContext &Context = CS.getParent()->getParent()->getParent()->getContext(); - Value *NewLen = NULL; + LLVMContext &context = pCallInst->getParent()->getParent()->getParent()->getContext(); + Value *NewLen = NULL; if (ConstantInt *C = dyn_cast(Len)) { - NewLen = ConstantInt::get(Type::getInt32Ty(Context), + NewLen = ConstantInt::get(Type::getInt32Ty(context), C->getValue().getLimitedValue((1ULL << 32) - 1)); } else { - NewLen = new TruncInst(Len, Type::getInt32Ty(Context), "lvl_dwn", CS.getInstruction()); + NewLen = new TruncInst(Len, Type::getInt32Ty(context), "lvl_dwn", pCallInst); } return NewLen; } -static void populateArgumentList(llvm::CallSite &CS, llvm::Value *newLen, size_t idx, std::vector &Ops) +static void populateArgumentList(llvm::CallInst *pCallInst, llvm::Value *newLen, size_t idx, std::vector &Ops) { - for (unsigned i = 0; i < CS.arg_size(); ++i) { + for (unsigned i = 0; i < pCallInst->arg_size(); ++i) { if (i == idx) { Ops.push_back(newLen); } else { - Ops.push_back(CS.getArgument(i)); + Ops.push_back(pCallInst->getArgOperand(i)); } } } -static bool replaceIntrinsicCalls(llvm::MemIntrinsic *MI, std::pair rep, size_t idx) +static bool replaceIntrinsicCalls(llvm::MemIntrinsic *pMemIntrinsic, std::pair rep, size_t idx) { - llvm::Function *pCalled = MI->getCalledFunction(); + llvm::Function *pCalled = pMemIntrinsic->getCalledFunction(); { if (rep.first == pCalled) { - llvm::CallSite CS(MI); - Value *Len = CS.getArgument(2); - llvm::Value *newLen = getReplacementSizeOperand(CS, Len); + //llvm::CallSite CS(MI); + Value *Len = pMemIntrinsic->getArgOperand(2); + llvm::Value *newLen = getReplacementSizeOperand(pMemIntrinsic, Len); std::vector args; - populateArgumentList(CS, newLen, idx, args); + populateArgumentList(pMemIntrinsic, newLen, idx, args); assert(args.size() == 4 && "malformed intrinsic call!"); - llvm::Instruction *i = CallInst::Create(rep.second, args, MI->getName(), MI); + llvm::Instruction *i = CallInst::Create(rep.second, args, pMemIntrinsic->getName(), pMemIntrinsic); assert(i && "Failed to create new CallInst"); return true; @@ -421,7 +338,6 @@ static bool replaceIntrinsicCalls(llvm::MemIntrinsic *MI, std::pair InstDel; std::vector> repPairs; @@ -458,7 +374,7 @@ static void gatherAllocasWithBitcasts(llvm::BasicBlock *bb, std::vector [#uses=2] @@ -496,19 +412,9 @@ void ClamBCLowering::fixupBitCasts(Function &F) continue; } - /*aragusa - * I am getting an assertion failure trying to cast a value that is not an ArrayType - * to an ArrayType. I don't fully understand the reason for doing what we are doing here. - * I am just going to check if AI->getAllocatedType is an array type. I may need to revisit this later. - */ if (not llvm::isa(AI->getAllocatedType())) { continue; } - /*Intentionally leaving this debug message in, because I don't think this code is executed very often, and - * I don't believe it is necessary. Once I get the bugs ironed out of the header files, I am going to - * see if this ever prints and does not have an assertion failure. The iterators were previously not working - * correctly and in fixing them, I believe I turned on code that wasn't previously working.*/ - const ArrayType *arTy = cast(AI->getAllocatedType()); Type *APTy = PointerType::getUnqual(arTy->getElementType()); @@ -517,7 +423,6 @@ void ClamBCLowering::fixupBitCasts(Function &F) AIC->setName("ClamBCLowering_fixupBitCasts"); BasicBlock::iterator IP = AI->getParent()->begin(); while (isa(IP)) ++IP; - //Value *Idx[] = {Zero, Zero}; llvm::ArrayRef Idxs = {Zero, Zero}; V = GetElementPtrInst::Create(nullptr, AIC, Idxs, "base_gepz", AI); @@ -545,7 +450,6 @@ void ClamBCLowering::fixupGEPs(Function &F) std::vector indexes; GetElementPtrInst::op_iterator J = GEPI->idx_begin(), JE = GEPI->idx_end(); for (; J != JE; ++J) { - //llvm::Value * v = llvm::cast(J); // push all constants if (Constant *C = dyn_cast(*J)) { indexes.push_back(C); @@ -556,10 +460,7 @@ void ClamBCLowering::fixupGEPs(Function &F) 0)); break; } - Constant *C = cast(GEPI->getOperand(0)); - //Constant *GC = ConstantExpr::getInBoundsGetElementPtr(C, - // &indexes[0], - // indexes.size()); + Constant *C = cast(GEPI->getOperand(0)); Constant *GC = ConstantExpr::getInBoundsGetElementPtr(nullptr, C, indexes); if (J != JE) { @@ -567,11 +468,10 @@ void ClamBCLowering::fixupGEPs(Function &F) for (; J != JE; ++J) { indexes.push_back(*J); } - //AllocaInst *AI = new AllocaInst(GC->getType(), "", Entry->begin()); AllocaInst *AI = new AllocaInst(GC->getType(), 0, "ClamBCLowering_fixupGEPs", llvm::cast(Entry->begin())); new StoreInst(GC, AI, GEPI); - Value *L = new LoadInst(AI, "ClamBCLowering_fixupGEPs", GEPI); - Value *V = GetElementPtrInst::CreateInBounds(L, indexes, "ClamBCLowering_fixupGEPs", GEPI); + Value *L = new LoadInst(AI->getType(), AI, "ClamBCLowering_fixupGEPs", GEPI); + Value *V = GetElementPtrInst::CreateInBounds(L->getType(), L, indexes, "ClamBCLowering_fixupGEPs", GEPI); GEPI->replaceAllUsesWith(V); GEPI->eraseFromParent(); } else { @@ -629,7 +529,7 @@ void ClamBCLowering::splitGEPZArray(Function &F) continue; } const PointerType *Ty = cast(GEPI->getPointerOperand()->getType()); - const ArrayType *ATy = dyn_cast(Ty->getElementType()); + const ArrayType *ATy = dyn_cast(Ty->getArrayElementType()); if (!ATy) { continue; } @@ -637,18 +537,21 @@ void ClamBCLowering::splitGEPZArray(Function &F) Constant *Zero = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 0); Value *VZ[] = {Zero, Zero}; // transform GEPZ: [4 x i16]* %p, 0, %i -> GEP1 i16* (bitcast)%p, %i - Value *C = GetElementPtrInst::CreateInBounds(GEPI->getPointerOperand(), VZ, "ClamBCLowering_splitGEPZArray", GEPI); - Value *NG = GetElementPtrInst::CreateInBounds(C, V, "ClamBCLowering_splitGEPZArray", GEPI); + Value *C = GetElementPtrInst::CreateInBounds(GEPI->getPointerOperand()->getType(), GEPI->getPointerOperand(), VZ, "ClamBCLowering_splitGEPZArray", GEPI); + Value *NG = GetElementPtrInst::CreateInBounds(C->getType(), C, V, "ClamBCLowering_splitGEPZArray", GEPI); GEPI->replaceAllUsesWith(NG); GEPI->eraseFromParent(); } } } -bool ClamBCLowering::runOnModule(Module &M) +PreservedAnalyses ClamBCLowering::run(Module &m, ModuleAnalysisManager &MAM) { - for (Module::iterator I = M.begin(), E = M.end(); + pMod = &m; + pContext = &(pMod->getContext()); + + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { if (I->isDeclaration()) continue; @@ -663,14 +566,7 @@ bool ClamBCLowering::runOnModule(Module &M) } } - return true; + return PreservedAnalyses::none(); } -} // namespace - -static RegisterPass X("clambc-lowering-notfinal", "ClamBC Lowering Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -static RegisterPass XX("clambc-lowering-final", "ClamBC Lowering Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCLowering/ClamBCLowering.h b/libclambcc/ClamBCLowering/ClamBCLowering.h new file mode 100644 index 0000000000..c1f80327e4 --- /dev/null +++ b/libclambcc/ClamBCLowering/ClamBCLowering.h @@ -0,0 +1,61 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +//#define DEBUG_TYPE "bclowering" + +#include +#include + +namespace ClamBCLowering +{ + +class ClamBCLowering : public llvm::PassInfoMixin +{ + public: + ClamBCLowering() {} + + virtual ~ClamBCLowering() {} + + virtual llvm::StringRef getPassName() const + { + return "ClamAV Bytecode Lowering"; + } + virtual llvm::PreservedAnalyses run(llvm::Module &m, llvm::ModuleAnalysisManager &MAM); + virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const + { + } + + protected: + virtual bool isFinal() = 0; + llvm::LLVMContext *pContext = nullptr; + llvm::Module *pMod = nullptr; + + private: + void lowerIntrinsics(llvm::IntrinsicLowering *IL, llvm::Function &F); + void simplifyOperands(llvm::Function &F); + void downsizeIntrinsics(llvm::Function &F); + void splitGEPZArray(llvm::Function &F); + void fixupBitCasts(llvm::Function &F); + void fixupGEPs(llvm::Function &F); + void fixupPtrToInts(llvm::Function &F); +}; + +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCLowering/ClamBCLoweringF.cpp b/libclambcc/ClamBCLowering/ClamBCLoweringF.cpp new file mode 100644 index 0000000000..b5de956aaf --- /dev/null +++ b/libclambcc/ClamBCLowering/ClamBCLoweringF.cpp @@ -0,0 +1,65 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCLowering.h" + +#include "clambc.h" + +#include + +using namespace llvm; + +namespace ClamBCLowering +{ + +class ClamBCLoweringF : public ClamBCLowering +{ + public: + ClamBCLoweringF() {} + virtual ~ClamBCLoweringF() {} + + protected: + virtual bool isFinal() + { + return true; + } +}; + +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLowering", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lowering-final") { + FPM.addPass(ClamBCLoweringF()); + return true; + } + return false; + }); + }}; +} + +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp b/libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp new file mode 100644 index 0000000000..8b26b3ff72 --- /dev/null +++ b/libclambcc/ClamBCLowering/ClamBCLoweringNF.cpp @@ -0,0 +1,66 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCLowering.h" + +#include "clambc.h" + +#include + +using namespace llvm; + +namespace ClamBCLowering +{ + +class ClamBCLoweringNF : public ClamBCLowering +{ + public: + ClamBCLoweringNF() {} + virtual ~ClamBCLoweringNF() {} + + protected: + virtual bool isFinal() + { + return false; + } +}; + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLowering", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lowering-notfinal") { + FPM.addPass(ClamBCLoweringNF()); + return true; + } + return false; + }); + }}; +} + +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCModule/ClamBCModule.cpp b/libclambcc/ClamBCModule/ClamBCModule.cpp deleted file mode 100644 index 33de28eb59..0000000000 --- a/libclambcc/ClamBCModule/ClamBCModule.cpp +++ /dev/null @@ -1,30 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "clambc.h" - -using namespace llvm; - -namespace -{ -struct ClamBCModule : public FunctionPass { - static char ID; - ClamBCModule() - : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override - { - return false; - } -}; // end of struct ClamBCModule -} // end of anonymous namespace - -char ClamBCModule::ID = 0; -static RegisterPass X("clambc-module", "ClamBCModule Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt b/libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt new file mode 100644 index 0000000000..3fba904bca --- /dev/null +++ b/libclambcc/ClamBCOutlineEndiannessCalls/CMakeLists.txt @@ -0,0 +1,45 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcoutlineendiannesscalls object library +# +add_library(clambcoutlineendiannesscalls_obj OBJECT) +target_sources(clambcoutlineendiannesscalls_obj + PRIVATE + ClamBCOutlineEndiannessCalls.cpp +) + +target_include_directories(clambcoutlineendiannesscalls_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcoutlineendiannesscalls_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcoutlineendiannesscalls_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcoutlineendiannesscalls shared library. +# +add_library( clambcoutlineendiannesscalls SHARED ) +target_link_libraries( clambcoutlineendiannesscalls + PUBLIC + clambcoutlineendiannesscalls_obj ) +set_target_properties( clambcoutlineendiannesscalls PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcoutlineendiannesscalls PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcoutlineendiannesscalls PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcoutlineendiannesscalls DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + + diff --git a/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp b/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp index 2048ecb7ce..b3449fe530 100644 --- a/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp +++ b/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp @@ -1,20 +1,22 @@ +#include "clambc.h" + #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include -#include "Common/clambc.h" +#include +#include using namespace llvm; namespace { -class OutlineEndniassCalls : public ModulePass +class ClamBCOutlineEndiannessCalls : public PassInfoMixin { protected: bool bChanged = false; @@ -25,7 +27,7 @@ class OutlineEndniassCalls : public ModulePass for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { CallInst* pCall = llvm::dyn_cast(i); if (pCall) { - if ("__is_bigendian" == pCall->getCalledValue()->getName()) { + if ("__is_bigendian" == pCall->getCalledFunction()->getName()) { calls.push_back(pCall); } } @@ -79,33 +81,49 @@ class OutlineEndniassCalls : public ModulePass public: static char ID; - OutlineEndniassCalls() - : ModulePass(ID) {} + ClamBCOutlineEndiannessCalls() {} - virtual bool runOnModule(Module& m) override + virtual PreservedAnalyses run(Module& m, ModuleAnalysisManager& MAM) { pMod = &m; std::vector calls = findCalls(); if (0 == calls.size()) { - return false; + return PreservedAnalyses::all(); } Function* pNew = getNewEndiannessFunction(calls[0]); for (size_t i = 0; i < calls.size(); i++) { - CallInst* pNewCall = CallInst::Create(pNew, "OutlineEndniassCalls_", calls[i]); + CallInst* pNewCall = CallInst::Create(pNew, "ClamBCOutlineEndiannessCalls_", calls[i]); calls[i]->replaceAllUsesWith(pNewCall); calls[i]->eraseFromParent(); } - return bChanged; + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } -}; // end of struct OutlineEndniassCalls +}; // end of struct ClamBCOutlineEndiannessCalls } // end of anonymous namespace -char OutlineEndniassCalls::ID = 0; -static RegisterPass X("clambc-outline-endianness-calls", "OutlineEndniassCalls TEST Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCOutlineEndiannessCalls", "v0.1", + [](PassBuilder& PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager& FPM, + ArrayRef) { + if (Name == "clambc-outline-endianness-calls") { + FPM.addPass(ClamBCOutlineEndiannessCalls()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt b/libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt new file mode 100644 index 0000000000..ad914a0d63 --- /dev/null +++ b/libclambcc/ClamBCPrepareGEPsForWriter/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcpreparegepsforwriter object library +# +add_library(clambcpreparegepsforwriter_obj OBJECT) +target_sources(clambcpreparegepsforwriter_obj + PRIVATE + ClamBCPrepareGEPsForWriter.cpp +) + +target_include_directories(clambcpreparegepsforwriter_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcpreparegepsforwriter_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcpreparegepsforwriter_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcpreparegepsforwriter shared library. +# +add_library( clambcpreparegepsforwriter SHARED ) +target_link_libraries( clambcpreparegepsforwriter + PUBLIC + clambcpreparegepsforwriter_obj ) +set_target_properties( clambcpreparegepsforwriter PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcpreparegepsforwriter PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcpreparegepsforwriter PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcpreparegepsforwriter DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp b/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp index 8033f1f50b..948a2b7a82 100644 --- a/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp +++ b/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp @@ -19,14 +19,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" +#include "bytecode_api.h" #include "clambc.h" #include "ClamBCModule.h" +#include "ClamBCUtilities.h" + #include "ClamBCAnalyzer/ClamBCAnalyzer.h" -#include "Common/ClamBCUtilities.h" #include -//#include "ClamBCTargetMachine.h" #include #include #include @@ -37,7 +37,6 @@ #include #include #include -//#include "llvm/Config/config.h" #include #include #include @@ -47,21 +46,21 @@ #include #include #include +#include +#include #include using namespace llvm; -class ClamBCPrepareGEPsForWriter : public ModulePass -{ +struct ClamBCPrepareGEPsForWriter : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; public: static char ID; - explicit ClamBCPrepareGEPsForWriter() - : ModulePass(ID) {} + explicit ClamBCPrepareGEPsForWriter() {} virtual ~ClamBCPrepareGEPsForWriter() {} @@ -232,10 +231,10 @@ class ClamBCPrepareGEPsForWriter : public ModulePass Value *gepiNew = underlyingObject; if (gepiNew->getType()->getPointerElementType()->isArrayTy()) { - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, Idxs, "processGEPI_2_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, Idxs, "processGEPI_2_", pgepi); } - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, vCnt, "processGEPI_3_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, vCnt, "processGEPI_3_", pgepi); CastInst *ciNew = CastInst::CreatePointerCast(gepiNew, pgepi->getType(), "processGEPI_", pgepi); @@ -305,10 +304,10 @@ class ClamBCPrepareGEPsForWriter : public ModulePass Value *gepiNew = underlyingObject; if (gepiNew->getType()->getPointerElementType()->isArrayTy()) { - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, Idxs, "processGEPI_0_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, Idxs, "processGEPI_0_", pgepi); } - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, vCnt, "processGEPI_1_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, vCnt, "processGEPI_1_", pgepi); CastInst *ciNew = CastInst::CreatePointerCast(gepiNew, pgepi->getType(), "processGEPI_", pgepi); @@ -372,7 +371,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass } } - virtual bool runOnModule(Module &m) + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { pMod = &m; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -387,7 +386,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass fixCasts(pFunc); } - return true; + return PreservedAnalyses::none(); } virtual void fixCasts(Function *pFunc) @@ -417,12 +416,21 @@ class ClamBCPrepareGEPsForWriter : public ModulePass } }; -char ClamBCPrepareGEPsForWriter::ID = 0; -static RegisterPass X("clambc-prepare-geps-for-writer", "ClamBCPrepareGEPsForWriter Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); - -llvm::ModulePass *createClamBCPrepareGEPsForWriter() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCPrepareGEPsForWriter(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCPrepareGEPsForWriter", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-prepare-geps-for-writer") { + FPM.addPass(ClamBCPrepareGEPsForWriter()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCPreserveABIs/CMakeLists.txt b/libclambcc/ClamBCPreserveABIs/CMakeLists.txt new file mode 100644 index 0000000000..e9a928c480 --- /dev/null +++ b/libclambcc/ClamBCPreserveABIs/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcpreserveabis object library +# +add_library(clambcpreserveabis_obj OBJECT) +target_sources(clambcpreserveabis_obj + PRIVATE + ClamBCPreserveABIs.cpp +) + +target_include_directories(clambcpreserveabis_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcpreserveabis_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcpreserveabis_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcpreserveabis shared library. +# +add_library( clambcpreserveabis SHARED ) +target_link_libraries( clambcpreserveabis + PUBLIC + clambcpreserveabis_obj ) +set_target_properties( clambcpreserveabis PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcpreserveabis PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcpreserveabis PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcpreserveabis DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp b/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp index d735be0636..4b6fcdc0c2 100644 --- a/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp +++ b/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp @@ -1,17 +1,18 @@ +#include "clambc.h" +#include "ClamBCUtilities.h" + #include -#include "llvm/IR/Module.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include #include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" +#include +#include +#include #include #include @@ -32,7 +33,7 @@ namespace * to fake functions. If it does find it (the second time), it removes those * calls. */ -class ClamBCPreserveABIs : public ModulePass +class ClamBCPreserveABIs : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; @@ -46,9 +47,9 @@ class ClamBCPreserveABIs : public ModulePass return; } FunctionType *pFunctionType = llvm::dyn_cast(pFunc->getType()); - std::string newname = pFunc->getName(); + std::string newname(pFunc->getName()); + pFunctionType = pFunc->getFunctionType(); newname += "_fake"; - pFunctionType = llvm::cast(llvm::cast(pFunc->getType())->getElementType()); Function *fakeFunction = Function::Create(pFunctionType, Function::ExternalLinkage, newname, pFunc->getParent()); fakeFunctions.push_back(fakeFunction); std::vector args; @@ -127,18 +128,16 @@ class ClamBCPreserveABIs : public ModulePass } public: - static char ID; - ClamBCPreserveABIs() - : ModulePass(ID) {} + ClamBCPreserveABIs() {} virtual ~ClamBCPreserveABIs() {} - bool runOnModule(Module &m) override + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { pMod = &m; if (removeFakeFunctions()) { - return bChanged; + return PreservedAnalyses::none(); } for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -157,13 +156,30 @@ class ClamBCPreserveABIs : public ModulePass writeMetadata(); - return bChanged; + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of struct ClamBCPreserveABIs } // end of anonymous namespace -char ClamBCPreserveABIs::ID = 0; -static RegisterPass X("clambc-preserve-abis", "Preserve ABIs", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCPreserveABIs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-preserve-abis") { + FPM.addPass(ClamBCPreserveABIs()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRebuild/CMakeLists.txt b/libclambcc/ClamBCRebuild/CMakeLists.txt new file mode 100644 index 0000000000..3020913e03 --- /dev/null +++ b/libclambcc/ClamBCRebuild/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcrebuild object library +# +add_library(clambcrebuild_obj OBJECT) +target_sources(clambcrebuild_obj + PRIVATE + ClamBCRebuild.cpp +) + +target_include_directories(clambcrebuild_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcrebuild_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcrebuild_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcrebuild shared library. +# +add_library( clambcrebuild SHARED ) +target_link_libraries( clambcrebuild + PUBLIC + clambcrebuild_obj ) +set_target_properties( clambcrebuild PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcrebuild PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcrebuild PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcrebuild DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp b/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp index c253bbded3..2b83c21c89 100644 --- a/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp +++ b/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp @@ -19,13 +19,16 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ + +#include "ClamBCModule.h" +#include "clambc.h" +#include "ClamBCUtilities.h" + #include -#include #include #include #include #include -#include #include #include #include @@ -33,23 +36,21 @@ #include #include #include +#include +#include #include #include #include #include #include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" - using namespace llvm; -class ClamBCRebuild : public ModulePass, public InstVisitor +class ClamBCRebuild : public PassInfoMixin, public InstVisitor { public: static char ID; - explicit ClamBCRebuild() - : ModulePass(ID) {} + explicit ClamBCRebuild() {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Backend Rebuilder"; @@ -82,8 +83,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Builder = new IRBuilder(*Context, TF); - SE = nullptr; - Expander = nullptr; + SE = nullptr; visitFunction(F, &NF); for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { @@ -104,6 +104,13 @@ class ClamBCRebuild : public ModulePass, public InstVisitor for (unsigned i = 0; i < N->getNumIncomingValues(); i++) { Value *V = mapPHIValue(N->getIncomingValue(i)); BasicBlock *BB = mapBlock(N->getIncomingBlock(i)); + + if (V->getType() != N->getType()) { + if (V->getType()->isPointerTy() and N->getType()->isPointerTy()) { + V = CastInst::CreatePointerCast(V, N->getType(), + "ClamBCRebuild_fixCast_", BB->getTerminator()); + } + } PN->addIncoming(V, BB); } assert(PN->getNumIncomingValues() > 0); @@ -114,9 +121,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor fixupCalls(F, copy); F->setLinkage(GlobalValue::InternalLinkage); - if (Expander) { - delete Expander; - } delete Builder; return true; } @@ -146,7 +150,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void fixupCallInst(CallInst *pCallInst, Function *pFunc) { assert(pCallInst->arg_size() == pFunc->arg_size() && "Incorrect number of arguments"); - assert(pCallInst->getCalledValue() == pFunc && "This CallInst doesn't call this function"); auto argIter = pFunc->arg_begin(), argEnd = pFunc->arg_end(); auto callIter = pCallInst->arg_begin(), callEnd = pCallInst->arg_end(); @@ -174,13 +177,12 @@ class ClamBCRebuild : public ModulePass, public InstVisitor } } - bool runOnModule(Module &M) + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM) { pMod = &M; /* Taken from doInitialization. */ FMap.clear(); - //FMapRev.clear(); Context = &(pMod->getContext()); i8Ty = Type::getInt8Ty(*Context); @@ -188,7 +190,11 @@ class ClamBCRebuild : public ModulePass, public InstVisitor std::vector funcs; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function *pFunc = llvm::cast(i); + Function *pFunc = llvm::cast(i); + const FunctionType *FTy = pFunc->getFunctionType(); + if (FTy->isVarArg()) { + return PreservedAnalyses::all(); + } funcs.push_back(pFunc); } for (size_t i = 0; i < funcs.size(); i++) { @@ -196,7 +202,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor runOnFunction(*pFunc); } - return true; + return PreservedAnalyses::none(); } private: @@ -214,14 +220,12 @@ class ClamBCRebuild : public ModulePass, public InstVisitor ValueMapTy VMap; DenseMap, Value *> CastMap; - ScalarEvolution *SE = nullptr; - Type *i8Ty = nullptr; - Type *i8pTy = nullptr; - //FunctionPassManager *FPM = nullptr; + ScalarEvolution *SE = nullptr; + Type *i8Ty = nullptr; + Type *i8pTy = nullptr; LLVMContext *Context = nullptr; DenseSet visitedBB; IRBuilder *Builder = nullptr; - SCEVExpander *Expander = nullptr; void stop(const std::string &Msg, const llvm::Instruction *I) { @@ -229,19 +233,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor } friend class InstVisitor; - const Type *getInnerElementType(const CompositeType *CTy) - { - const Type *ETy = nullptr; - // get pointer to first element - do { - assert(CTy->indexValid(0u)); - ETy = CTy->getTypeAtIndex(0u); - CTy = dyn_cast(ETy); - } while (CTy); - assert(ETy->isIntegerTy()); - return ETy; - } - Type *rebuildType(Type *Ty, bool i8only = false) { assert(Ty); @@ -347,7 +338,8 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Value *PV = mapValue(P); if (PV->getType() == Ty && !isa(PV)) { assert(!isa(PV) || - cast(Ty)->getElementType()->isIntegerTy()); + Ty->getPointerElementType()->isIntegerTy()); + return PV; } PV = PV->stripPointerCasts(); @@ -425,7 +417,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void visitLoadInst(LoadInst &I) { Value *P = I.getPointerOperand(); - VMap[&I] = Builder->CreateLoad(mapPointer(P, P->getType()), + VMap[&I] = Builder->CreateLoad(I.getType(), mapPointer(P, P->getType()), I.getName()); } @@ -451,12 +443,18 @@ class ClamBCRebuild : public ModulePass, public InstVisitor I != E; ++I) { idxs.push_back(mapValue(*I)); } + + Type *pt = P->getType(); + if (llvm::isa(pt)) { + pt = pt->getPointerElementType(); + } + if (II.isInBounds()) { //P = Builder->CreateInBoundsGEP(P, idxs.begin(), idxs.end()); - P = Builder->CreateInBoundsGEP(P, idxs, "clambcRebuildInboundsGEP"); + P = Builder->CreateInBoundsGEP(pt, P, idxs, "clambcRebuildInboundsGEP"); } else { //P = Builder->CreateGEP(P, idxs.begin(), idxs.end()); - P = Builder->CreateGEP(P, idxs, "clambcRebuildGEP"); + P = Builder->CreateGEP(pt, P, idxs, "clambcRebuildGEP"); } VMap[&II] = makeCast(P, rebuildType(II.getType())); ; @@ -599,13 +597,22 @@ class ClamBCRebuild : public ModulePass, public InstVisitor return ret; } }; -char ClamBCRebuild::ID = 0; - -static RegisterPass X("clambc-rebuild", "ClamBCRebuild Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -llvm::ModulePass *createClamBCRebuild(void) +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCRebuild(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRebuild", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-rebuild") { + FPM.addPass(ClamBCRebuild()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCRegAlloc/CMakeLists.txt b/libclambcc/ClamBCRegAlloc/CMakeLists.txt new file mode 100644 index 0000000000..2fc8955153 --- /dev/null +++ b/libclambcc/ClamBCRegAlloc/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcregalloc object library +# +add_library(clambcregalloc_obj OBJECT) +target_sources(clambcregalloc_obj + PRIVATE + ClamBCRegAlloc.cpp +) + +target_include_directories(clambcregalloc_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcregalloc_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcregalloc_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcregalloc shared library. +# +add_library( clambcregalloc SHARED ) +target_link_libraries( clambcregalloc + PUBLIC + clambcregalloc_obj ) +set_target_properties( clambcregalloc PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcregalloc PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcregalloc PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcregalloc DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/Common/ClamBCRegAlloc.cpp b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.cpp similarity index 80% rename from libclambcc/Common/ClamBCRegAlloc.cpp rename to libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.cpp index 7bd83c011b..38ae1d076e 100644 --- a/libclambcc/Common/ClamBCRegAlloc.cpp +++ b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.cpp @@ -19,13 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "ClamBCModule.h" +#include "ClamBCRegAlloc.h" #include "ClamBCUtilities.h" #include "clambc.h" #include -//#include "llvm/Analysis/LiveValues.h" -//#include "llvm/Config/config.h" #include #include #include @@ -34,6 +32,7 @@ #include #include #include +#include using namespace llvm; // We do have a virtually unlimited number of registers, but it is more cache @@ -45,8 +44,10 @@ using namespace llvm; // targets with fixed number of registers, and a much simpler allocator // suffices for us. +llvm::AnalysisKey ClamBCRegAllocAnalyzer::Key; + /*TODO: Should rework this so that we are not changing things with open iterators.*/ -void ClamBCRegAlloc::handlePHI(PHINode *PN) +void ClamBCRegAllocAnalysis::handlePHI(PHINode *PN) { BasicBlock *BB = PN->getIncomingBlock(0); for (unsigned i = 1; i < PN->getNumIncomingValues(); i++) { @@ -77,18 +78,18 @@ void ClamBCRegAlloc::handlePHI(PHINode *PN) ++It; } while (isa(It)); builder.SetInsertPoint(&*It); - LoadInst *LI = builder.CreateLoad(AI, ".phiload"); + LoadInst *LI = builder.CreateLoad(AI->getAllocatedType(), AI, ".phiload"); builder.SetInstDebugLocation(LI); PN->replaceAllUsesWith(LI); PN->eraseFromParent(); } -bool ClamBCRegAlloc::runOnFunction(Function &F) +bool ClamBCRegAllocAnalysis::runOnFunction(Function &F) { ValueMap.clear(); RevValueMap.clear(); - DT = &getAnalysis().getDomTree(); bool Changed = false; + std::vector pns; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock &BB = *I; BasicBlock::iterator J = BB.begin(); @@ -97,9 +98,13 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) if (!PN) break; ++J; - handlePHI(PN); + pns.push_back(PN); } } + for (size_t i = 0; i < pns.size(); i++) { + PHINode *PN = pns[i]; + handlePHI(PN); + } unsigned id = 0; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); @@ -108,8 +113,6 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) ValueMap[A] = id; if (RevValueMap.size() == id) { RevValueMap.push_back(A); - } else { - errs() << id << " " << __FILE__ << ":" << __LINE__ << "\n"; } ++id; } @@ -127,22 +130,22 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) ValueMap[II] = ~0u; continue; } + + { + static int first = 1; + if (first) { + first = 0; + } + } if (CastInst *BC = dyn_cast(II)) { if (BitCastInst *BCI = dyn_cast(BC)) { if (!BCI->isLosslessCast()) { ClamBCStop("Non lossless bitcast is not supported", BCI); } - const Type *SrcTy = BC->getOperand(0)->getType(); - const Type *DstTy = BC->getType(); - const PointerType *SPTy, *DPTy; - while ((SPTy = dyn_cast(SrcTy))) { - DPTy = dyn_cast(DstTy); - if (!DPTy) { - ClamBCStop("Cast from pointer to non-pointer element", - BCI); - } - SrcTy = SPTy->getElementType(); - DstTy = DPTy->getElementType(); + + if (BCI->getSrcTy()->isPointerTy() and (not BCI->getDestTy()->isPointerTy())) { + ClamBCStop("Cast from pointer to non-pointer element", + BCI); } if (AllocaInst *AI = dyn_cast(BCI->getOperand(0))) { @@ -151,8 +154,6 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) ValueMap[II] = id; if (RevValueMap.size() == id) { RevValueMap.push_back(II); - } else { - errs() << id << " " << __FILE__ << ":" << __LINE__ << "\n"; } ++id; continue; @@ -161,6 +162,9 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) SkipMap.insert(II); ValueMap[II] = getValueID(II->getOperand(0)); continue; + } else if (llvm::isa(BC) or llvm::isa(BC)) { + ClamBCStop("Cast from pointer to non-pointer element", + BCI); } } if (II->hasOneUse()) { @@ -181,14 +185,6 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) } } // single-use of load from alloca -> use directly value id of alloca - //TODO: we must check for intervening stores here, better use memdep! - /* if (LoadInst *LI = dyn_cast(II)) { - if (AllocaInst *AI = dyn_cast(LI->getPointerOperand())) { - ValueMap[LI] = getValueID(AI); - SkipMap.insert(LI); - continue; - } - }*/ } ValueMap[II] = id; if (RevValueMap.size() == id) { @@ -204,7 +200,7 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) return Changed; } -void ClamBCRegAlloc::dump() const +void ClamBCRegAllocAnalysis::dump() const { for (ValueIDMap::const_iterator I = ValueMap.begin(), E = ValueMap.end(); I != E; ++I) { @@ -212,7 +208,7 @@ void ClamBCRegAlloc::dump() const } } -void ClamBCRegAlloc::revdump() const +void ClamBCRegAllocAnalysis::revdump() const { for (unsigned i = 0; i < RevValueMap.size(); ++i) { errs() << i << ": "; @@ -221,7 +217,7 @@ void ClamBCRegAlloc::revdump() const } } -unsigned ClamBCRegAlloc::buildReverseMap(std::vector &reverseMap) +unsigned ClamBCRegAllocAnalysis::buildReverseMap(std::vector &reverseMap) { // Check using the older building code to determine changes due to building difference // Note: this code can be removed if necessary @@ -253,7 +249,7 @@ unsigned ClamBCRegAlloc::buildReverseMap(std::vector &reverseMap) return RevValueMap.size(); } -void ClamBCRegAlloc::getAnalysisUsage(AnalysisUsage &AU) const +void ClamBCRegAllocAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -261,8 +257,17 @@ void ClamBCRegAlloc::getAnalysisUsage(AnalysisUsage &AU) const // loads/stores. AU.setPreservesCFG(); } -char ClamBCRegAlloc::ID = 0; -static RegisterPass X("clambc-ra", - "ClamAV bytecode register allocator"); -const PassInfo *const ClamBCRegAllocID = &X; +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRegAlloc", "v0.1", + [](PassBuilder &PB) { + PB.registerAnalysisRegistrationCallback( + [](FunctionAnalysisManager &mam) { + mam.registerPass([]() { return ClamBCRegAllocAnalyzer(); }); + }); + }}; +} diff --git a/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h new file mode 100644 index 0000000000..7777ce79bc --- /dev/null +++ b/libclambcc/ClamBCRegAlloc/ClamBCRegAlloc.h @@ -0,0 +1,112 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef CLAMBC_REGALLOC_H +#define CLAMBC_REGALLOC_H + +#include "clambc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +class ClamBCRegAllocAnalysis +{ + public: + static char ID; + explicit ClamBCRegAllocAnalysis() {} + + unsigned buildReverseMap(std::vector &); + bool skipInstruction(const llvm::Instruction *I) const + { + return SkipMap.count(I); + } + + unsigned getValueID(const llvm::Value *V) const + { + ValueIDMap::const_iterator I = ValueMap.find(V); + if (I == ValueMap.end()) { + DEBUGERR << "Error Value ID requested for unknown value (Printing below).\n"; + DEBUGERR << *V << "\n"; + assert(0 && "Value ID requested for unknown value"); + } + assert(I->second != ~0u && + "Value ID requested for unused/void instruction!"); + return I->second; + } + virtual bool runOnFunction(llvm::Function &F); + virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const; + void dump() const; + void revdump() const; + + virtual void setDominatorTree(llvm::DominatorTree *dt) + { + DT = dt; + } + + private: + void handlePHI(llvm::PHINode *PN); + typedef llvm::DenseMap ValueIDMap; + ValueIDMap ValueMap; + std::vector RevValueMap; + llvm::DenseSet SkipMap; + llvm::DominatorTree *DT; +}; + +class ClamBCRegAllocAnalyzer : public llvm::AnalysisInfoMixin +{ + + protected: + ClamBCRegAllocAnalysis clamBCRegAllocAnalysis; + + public: + ClamBCRegAllocAnalyzer() {} + virtual ~ClamBCRegAllocAnalyzer() {} + + friend AnalysisInfoMixin; + static llvm::AnalysisKey Key; + typedef ClamBCRegAllocAnalysis Result; + + ClamBCRegAllocAnalysis &run(llvm::Function &F, llvm::FunctionAnalysisManager &fam) + { + + llvm::DominatorTree &dt = fam.getResult(F); + clamBCRegAllocAnalysis.setDominatorTree(&dt); + clamBCRegAllocAnalysis.runOnFunction(F); + clamBCRegAllocAnalysis.setDominatorTree(NULL); + + return clamBCRegAllocAnalysis; + } +}; + +#endif //CLAMBC_REGALLOC_H diff --git a/libclambcc/ClamBCRemoveFSHL/CMakeLists.txt b/libclambcc/ClamBCRemoveFSHL/CMakeLists.txt new file mode 100644 index 0000000000..80739edcf0 --- /dev/null +++ b/libclambcc/ClamBCRemoveFSHL/CMakeLists.txt @@ -0,0 +1,42 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovefshl object library +# +add_library(clambcremovefshl_obj OBJECT) +target_sources(clambcremovefshl_obj + PRIVATE + ClamBCRemoveFSHL.cpp +) + +target_include_directories(clambcremovefshl_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovefshl_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovefshl_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovefshl shared library. +# +add_library( clambcremovefshl SHARED ) +target_link_libraries( clambcremovefshl + PUBLIC + clambcremovefshl_obj ) +set_target_properties( clambcremovefshl PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovefshl PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovefshl PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremovefshl DESTINATION ${CMAKE_INSTALL_LIBDIR}) + diff --git a/libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp b/libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp new file mode 100644 index 0000000000..a6698fd34c --- /dev/null +++ b/libclambcc/ClamBCRemoveFSHL/ClamBCRemoveFSHL.cpp @@ -0,0 +1,178 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Remove fshl intrinsic because it's not supported by our runtime. + */ +struct ClamBCRemoveFSHL : public PassInfoMixin { + protected: + Module *pMod = nullptr; + + FunctionType *fshlType = nullptr; + + virtual llvm::FunctionType *getFSHLFunctionType(Type *functionArgType) + { + return FunctionType::get(functionArgType, {functionArgType, functionArgType, functionArgType}, false); + } + + virtual llvm::Function *addFunction64(IntegerType *functionArgType, const char *const functionName) + { + /*Will determine if this is necessary during the rc phase.*/ + /* + This is an example function, needs to be converted to IR + static uint8_t fshl8_noshifts(uint8_t left, uint8_t right, uint8_t shift){ + uint8_t ret = 0; + uint8_t bitwidth = 8; + uint8_t bitIdx = (2 * bitwidth) - (shift % bitwidth) - 1; + uint8_t bit; + + for (size_t i = 0; i < bitwidth; i++){ + if (bitIdx >= bitwidth) { + bit = (left & (1 << (bitIdx - bitwidth))) ? 1 : 0; + ret |= (bit << ((bitwidth - 1) - i)); + } else { + bit = right & (1 << bitIdx); + ret |= (bit << ((bitwidth - 1) - i)); + } + bitIdx-- ; + } + + return ret; + } + */ + assert(0 && "Unimplemented"); + } + + /* + * addFunction was based on this. + * static uint8_t fshl8_shifts(uint8_t left, uint8_t right, uint8_t shift){ + * uint16_t tmp = (left << 8) | right; + * tmp <<= (shift % 8); + * tmp = (tmp & 0xff00) >> 8; + * return (uint8_t) (tmp & 0xff); + * } + + */ + virtual llvm::Function *addFunction(IntegerType *functionArgType, const char *const functionName) + { + + if (64 == functionArgType->getBitWidth()) { + return addFunction64(functionArgType, functionName); + } + + FunctionType *ft = getFSHLFunctionType(functionArgType); + IntegerType *i64 = IntegerType::get(pMod->getContext(), 64); + ConstantInt *pciBitWidth = ConstantInt::get(i64, functionArgType->getBitWidth()); + + llvm::Function *fshl = Function::Create(ft, GlobalValue::InternalLinkage, functionName, *pMod); + Value *pLeft = fshl->getArg(0); + Value *pRight = fshl->getArg(1); + Value *pShift = fshl->getArg(2); + BasicBlock *pEntry = BasicBlock::Create(pMod->getContext(), "entry", fshl); + + pLeft = CastInst::CreateZExtOrBitCast(pLeft, i64, "zext_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Shl, pLeft, pciBitWidth, "shl_", pEntry); + pRight = CastInst::CreateZExtOrBitCast(pRight, i64, "zext_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Or, pLeft, pRight, "or", pEntry); + pShift = CastInst::CreateZExtOrBitCast(pShift, i64, "zext_", pEntry); + + pShift = BinaryOperator::Create(Instruction::URem, pShift, pciBitWidth, "urem_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Shl, pLeft, pShift, "shl_", pEntry); + + pLeft = BinaryOperator::Create(Instruction::LShr, pLeft, pciBitWidth, "shr_", pEntry); + pLeft = CastInst::CreateTruncOrBitCast(pLeft, functionArgType, "trunc_", pEntry); + ReturnInst::Create(pMod->getContext(), pLeft, pEntry); + + return fshl; + } + + virtual bool replaceCalls(const char *const intrinsicName, const char *functionName, IntegerType *functionArgType) + { + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()) { + Function *fshl = addFunction(functionArgType, functionName); + replaceAllCalls(getFSHLFunctionType(functionArgType), fshl, calls, "ClamBCRemoveFSHL_"); + + return true; + } + return false; + } + + public: + virtual ~ClamBCRemoveFSHL() {} + + /*TODO: Add this to validator.*/ + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.fshl.i32", ".fshl.i32", Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.fshl.i16", ".fshl.i16", Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.fshl.i8", ".fshl.i8", Type::getInt16Ty(pMod->getContext())); + + if (bRet) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + +}; // end of struct ClamBCRemoveFSHL + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFSHL", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-fshl") { + FPM.addPass(ClamBCRemoveFSHL()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt b/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt new file mode 100644 index 0000000000..2148e721c5 --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovefreezeinsts object library +# +add_library(clambcremovefreezeinsts_obj OBJECT) +target_sources(clambcremovefreezeinsts_obj + PRIVATE + ClamBCRemoveFreezeInsts.cpp +) + +target_include_directories(clambcremovefreezeinsts_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovefreezeinsts_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovefreezeinsts_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovefreezeinsts shared library. +# +add_library( clambcremovefreezeinsts SHARED ) +target_link_libraries( clambcremovefreezeinsts + PUBLIC + clambcremovefreezeinsts_obj ) +set_target_properties( clambcremovefreezeinsts PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovefreezeinsts PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovefreezeinsts PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremovefreezeinsts DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp b/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp new file mode 100644 index 0000000000..1d3607570a --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts/ClamBCRemoveFreezeInsts.cpp @@ -0,0 +1,119 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Freeze Instructions are to guarantee sane behaviour in the case of undefs or poison values. The interpreter + * has no notion of freeze instructions, so we are removing them. The verifier will fail if there are undef or + * poison values in the IR, so this is safe to do. + */ +struct ClamBCRemoveFreezeInsts : public PassInfoMixin { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherFreezeInsts(Function *pFunc, std::vector &freezeInsts) + { + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++) { + BasicBlock *pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { + if (FreezeInst *pfi = llvm::dyn_cast(bi)) { + freezeInsts.push_back(pfi); + } + } + } + } + + virtual void processFunction(Function *pFunc) + { + vector freezeInsts; + gatherFreezeInsts(pFunc, freezeInsts); + + for (size_t i = 0; i < freezeInsts.size(); i++) { + bChanged = true; + + FreezeInst *pfi = freezeInsts[i]; + pfi->replaceAllUsesWith(pfi->getOperand(0)); + pfi->eraseFromParent(); + } + } + + public: + virtual ~ClamBCRemoveFreezeInsts() {} + + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + processFunction(pFunc); + } + + if (bChanged) { + return PreservedAnalyses::none(); + } else { + return PreservedAnalyses::all(); + } + } +}; // end of struct ClamBCRemoveFreezeInsts + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFreezeInsts", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-freeze-insts") { + FPM.addPass(ClamBCRemoveFreezeInsts()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt b/libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt new file mode 100644 index 0000000000..27a20ce288 --- /dev/null +++ b/libclambcc/ClamBCRemoveICMPSLE/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveicmpsle object library +# +add_library(clambcremoveicmpsle_obj OBJECT) +target_sources(clambcremoveicmpsle_obj + PRIVATE + ClamBCRemoveICMPSLE.cpp +) + +target_include_directories(clambcremoveicmpsle_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveicmpsle_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveicmpsle_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveicmpsle shared library. +# +add_library( clambcremoveicmpsle SHARED ) +target_link_libraries( clambcremoveicmpsle + PUBLIC + clambcremoveicmpsle_obj ) +set_target_properties( clambcremoveicmpsle PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveicmpsle PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveicmpsle PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremoveicmpsle DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp b/libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp new file mode 100644 index 0000000000..c6db611925 --- /dev/null +++ b/libclambcc/ClamBCRemoveICMPSLE/ClamBCRemoveICMPSLE.cpp @@ -0,0 +1,115 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" + +#include +#include +#include + +#include +#include +#include + +#include + +using namespace llvm; +using namespace std; + +/* Modeled after CallGraphAnalysis */ + +namespace +{ +struct ClamBCRemoveICMPSLE : public PassInfoMixin { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherInstructions(Function *pFunc, std::vector &insts) + { + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { + BasicBlock *pBB = llvm::cast(i); + for (auto bbi = pBB->begin(), bbe = pBB->end(); bbi != bbe; bbi++) { + ICmpInst *inst = llvm::dyn_cast(bbi); + if (inst) { + if (CmpInst::ICMP_SLE == inst->getPredicate()) { + insts.push_back(inst); + } + } + } + } + } + + virtual void processFunction(Function *pFunc) + { + std::vector insts; + gatherInstructions(pFunc, insts); + + for (size_t i = 0; i < insts.size(); i++) { + insts[i]->swapOperands(); + } + } + + public: + virtual ~ClamBCRemoveICMPSLE() {} + + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::dyn_cast(i); + if (pFunc) { + if (pFunc->isDeclaration()) { + continue; + } + + processFunction(pFunc); + } + } + + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); + } +}; // end of struct ClamBCRemoveICMPSLE + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveICMPSLE", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-icmp-sle") { + FPM.addPass(ClamBCRemoveICMPSLE()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt b/libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt new file mode 100644 index 0000000000..74d9a9efd0 --- /dev/null +++ b/libclambcc/ClamBCRemovePointerPHIs/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremovepointerphis object library +# +add_library(clambcremovepointerphis_obj OBJECT) +target_sources(clambcremovepointerphis_obj + PRIVATE + ClamBCRemovePointerPHIs.cpp +) + +target_include_directories(clambcremovepointerphis_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremovepointerphis_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremovepointerphis_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremovepointerphis shared library. +# +add_library( clambcremovepointerphis SHARED ) +target_link_libraries( clambcremovepointerphis + PUBLIC + clambcremovepointerphis_obj ) +set_target_properties( clambcremovepointerphis PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremovepointerphis PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremovepointerphis PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremovepointerphis DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp b/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp index 4c8409641c..26af45e0cf 100644 --- a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp +++ b/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp @@ -1,3 +1,6 @@ +#include "clambc.h" +#include "ClamBCUtilities.h" +#include "ClamBCModule.h" #include #include @@ -8,24 +11,22 @@ #include -#include #include +#include +#include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" -#include "Common/ClamBCModule.h" using namespace llvm; #include namespace { -class ClambcRemovePointerPHIs : public FunctionPass +class ClamBCRemovePointerPHIs : public PassInfoMixin { protected: - Function *pFunc = nullptr; + llvm::Module *pMod = nullptr; - std::vector gatherPHIs() + std::vector gatherPHIs(llvm::Function *pFunc) { std::vector ret; @@ -178,14 +179,13 @@ class ClambcRemovePointerPHIs : public FunctionPass if (not pn->getType()->isPointerTy()) { return false; } - //std::vector delLst; Value *pBasePtr = findBasePointer(pn); if (nullptr == pBasePtr) { /*No unique base pointer.*/ return false; } - IntegerType *pType = Type::getInt64Ty(pFunc->getParent()->getContext()); + IntegerType *pType = Type::getInt64Ty(pMod->getContext()); Constant *zero = ConstantInt::get(pType, 0); Value *initValue = zero; PHINode *idxNode = PHINode::Create(pType, pn->getNumIncomingValues(), "ClamBCRemovePointerPHIs_idx_", pn); @@ -226,7 +226,13 @@ class ClambcRemovePointerPHIs : public FunctionPass std::vector newInsts; Instruction *insPt = findFirstNonPHI(pn->getParent()); - Instruction *gepiNew = GetElementPtrInst::Create(nullptr, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); + PointerType *pt = llvm::dyn_cast(pBasePtr->getType()); + if (nullptr == pt) { + assert(0 && "This pass is only for pointer phis, how did we get here???"); + } + Type *elementType = pt->getPointerElementType(); + + Instruction *gepiNew = GetElementPtrInst::Create(elementType, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); if (pn->getType() != gepiNew->getType()) { gepiNew = CastInst::CreatePointerCast(gepiNew, pn->getType(), "ClamBCRemovePointerPHIs_cast_", insPt); } @@ -283,33 +289,59 @@ class ClambcRemovePointerPHIs : public FunctionPass } public: - static char ID; - ClambcRemovePointerPHIs() - : FunctionPass(ID) {} + ClamBCRemovePointerPHIs() {} - bool runOnFunction(Function &F) override + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &mam) { - pFunc = &F; + /*Currently unused. Will remove after the RC phase.*/ + DEBUGERR << "TODO: EVALUATE WHETHER OR NOT I NEED THIS" + << "\n"; + return PreservedAnalyses::all(); + + pMod = &m; bool ret = false; - std::vector phis = gatherPHIs(); - for (size_t i = 0; i < phis.size(); i++) { - PHINode *pn = phis[i]; + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + llvm::Function *pFunc = llvm::dyn_cast(i); + if (nullptr == pFunc) { + continue; + } + std::vector phis = gatherPHIs(pFunc); + for (size_t i = 0; i < phis.size(); i++) { + PHINode *pn = phis[i]; - if (handlePHI(pn)) { - ret = true; + if (handlePHI(pn)) { + ret = true; + } } } - return ret; + if (ret) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of class ClambcRemovePointerPHIs } // end of anonymous namespace -char ClambcRemovePointerPHIs::ID = 0; -static RegisterPass X("clambc-remove-pointer-phis", "Remove PHI Nodes with pointers", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemovePointerPHIs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-pointer-phis") { + FPM.addPass(ClamBCRemovePointerPHIs()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp b/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp deleted file mode 100644 index 012c91773b..0000000000 --- a/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp +++ /dev/null @@ -1,116 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" - -using namespace llvm; - -namespace -{ -class RemoveSelectInsts : public ModulePass -{ - protected: - bool bChanged = false; - Module* pMod = nullptr; - - void processBasicBlock(BasicBlock* pBB, std::vector& selects) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - SelectInst* pSelect = llvm::dyn_cast(i); - if (pSelect) { - selects.push_back(pSelect); - } - } - } - - void processFunction(Function* pFunc, std::vector& selects) - { - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - BasicBlock* pBB = llvm::cast(i); - processBasicBlock(pBB, selects); - } - } - - std::vector gatherSelects() - { - std::vector selects; - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function* pFunc = llvm::cast(i); - - processFunction(pFunc, selects); - } - - return selects; - } - - Instruction* getAllocaInsertPoint(SelectInst* pSelect) - { - BasicBlock* entryBlock = llvm::cast(pSelect->getParent()->getParent()->begin()); - for (auto i = entryBlock->begin(), e = entryBlock->end(); i != e; i++) { - Instruction* pInst = llvm::cast(i); - if (not llvm::isa(pInst)) { - return pInst; - } - } - - assert(0 && "MALFORMED BASIC BLOCK"); - return nullptr; - } - - void replaceSelectInst(SelectInst* pSelect) - { - - Instruction* insertBefore = getAllocaInsertPoint(pSelect); - AllocaInst* pAlloca = new AllocaInst(pSelect->getType(), - pMod->getDataLayout().getProgramAddressSpace(), - "ClamBCRemoveSelectInst", insertBefore); - - BasicBlock* pBB = llvm::cast(pSelect->getParent()); - - BasicBlock* pSplit = pBB->splitBasicBlock(pSelect, "ClamBCRemoveSelectInst"); - new StoreInst(pSelect->getFalseValue(), pAlloca, pBB->getTerminator()); - - new StoreInst(pSelect->getTrueValue(), pAlloca, pSelect); - - BasicBlock* pSplit2 = pSplit->splitBasicBlock(pSelect, "ClamBCRemoveSelectInst"); - BranchInst::Create(pSplit, pSplit2, pSelect->getCondition(), pBB->getTerminator()); - - LoadInst* pLoad = new LoadInst(pAlloca->getType()->getPointerElementType(), pAlloca, "ClamBCRemoveSelectInst", pSelect); - pSelect->replaceAllUsesWith(pLoad); - - pBB->getTerminator()->eraseFromParent(); - pSelect->eraseFromParent(); - } - - public: - static char ID; - RemoveSelectInsts() - : ModulePass(ID) {} - - virtual bool runOnModule(Module& m) override - { - pMod = &m; - - std::vector selects = gatherSelects(); - for (size_t i = 0; i < selects.size(); i++) { - SelectInst* pSelect = selects[i]; - - replaceSelectInst(pSelect); - } - - return bChanged; - } -}; // end of struct RemoveSelectInsts -} // end of anonymous namespace - -char RemoveSelectInsts::ID = 0; -static RegisterPass X("remove-selects", "RemoveSelectInsts Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCRemoveUSUB/CMakeLists.txt b/libclambcc/ClamBCRemoveUSUB/CMakeLists.txt new file mode 100644 index 0000000000..f024437198 --- /dev/null +++ b/libclambcc/ClamBCRemoveUSUB/CMakeLists.txt @@ -0,0 +1,44 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveusub object library +# +add_library(clambcremoveusub_obj OBJECT) +target_sources(clambcremoveusub_obj + PRIVATE + ClamBCRemoveUSUB.cpp +) + +target_include_directories(clambcremoveusub_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveusub_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveusub_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveusub shared library. +# +add_library( clambcremoveusub SHARED ) +target_link_libraries( clambcremoveusub + PUBLIC + clambcremoveusub_obj ) +set_target_properties( clambcremoveusub PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveusub PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveusub PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremoveusub DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + + diff --git a/libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp b/libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp new file mode 100644 index 0000000000..2888d5c2a4 --- /dev/null +++ b/libclambcc/ClamBCRemoveUSUB/ClamBCRemoveUSUB.cpp @@ -0,0 +1,139 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Remove usub intrinsic because it's not supported by our runtime. + */ +struct ClamBCRemoveUSUB : public PassInfoMixin { + protected: + Module *pMod = nullptr; + const char *const USUB_NAME = ".usub"; + + FunctionType *usubType = nullptr; + + virtual llvm::FunctionType *getUSUBFunctionType(Type *functionArgType) + { + return FunctionType::get(functionArgType, {functionArgType, functionArgType}, false); + } + + virtual llvm::Function *addUSUB(Type *functionArgType) + { + uint32_t addressSpace = pMod->getDataLayout().getProgramAddressSpace(); + + FunctionType *ft = getUSUBFunctionType(functionArgType); + + llvm::Function *usub = Function::Create(ft, GlobalValue::InternalLinkage, USUB_NAME, *pMod); + Value *pLeft = usub->getArg(0); + Value *pRight = usub->getArg(1); + BasicBlock *pEntry = BasicBlock::Create(pMod->getContext(), "entry", usub); + BasicBlock *pLHS = BasicBlock::Create(pMod->getContext(), "left", usub); + BasicBlock *pRHS = BasicBlock::Create(pMod->getContext(), "right", usub); + BasicBlock *pRetBlock = BasicBlock::Create(pMod->getContext(), "ret", usub); + + //entry block + AllocaInst *retVar = new AllocaInst(functionArgType, addressSpace, "ret", pEntry); + ICmpInst *cmp = new ICmpInst(*pEntry, CmpInst::ICMP_UGT, pLeft, pRight, "icmp"); + BranchInst::Create(pLHS, pRHS, cmp, pEntry); + + //left > right + new StoreInst(BinaryOperator::Create(Instruction::Sub, pLeft, pRight, "ClamBCRemoveUSUB_", pLHS), retVar, pLHS); + BranchInst::Create(pRetBlock, pLHS); + + //right >= left + new StoreInst(ConstantInt::get(functionArgType, 0), retVar, pRHS); + BranchInst::Create(pRetBlock, pRHS); + + LoadInst *pli = new LoadInst(functionArgType, retVar, "load", pRetBlock); + ReturnInst::Create(pMod->getContext(), pli, pRetBlock); + return usub; + } + + virtual bool replaceCalls(const char *const intrinsicName, Type *functionArgType) + { + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()) { + Function *usub = addUSUB(functionArgType); + replaceAllCalls(getUSUBFunctionType(functionArgType), usub, calls, "ClamBCRemoveUSUB_"); + + return true; + } + return false; + } + + public: + virtual ~ClamBCRemoveUSUB() {} + + /*TODO: Add detection of these instructions to the validator.*/ + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.usub.sat.i32", Type::getInt32Ty(pMod->getContext())); + // bRet |= replaceCalls("llvm.usub.i16", Type::getInt16Ty(pMod->getContext())); + + if (bRet) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + +}; // end of struct ClamBCRemoveUSUB + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUSUB", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-usub") { + FPM.addPass(ClamBCRemoveUSUB()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveUndefs/CMakeLists.txt b/libclambcc/ClamBCRemoveUndefs/CMakeLists.txt new file mode 100644 index 0000000000..950a74e576 --- /dev/null +++ b/libclambcc/ClamBCRemoveUndefs/CMakeLists.txt @@ -0,0 +1,42 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveundefs object library +# +add_library(clambcremoveundefs_obj OBJECT) +target_sources(clambcremoveundefs_obj + PRIVATE + ClamBCRemoveUndefs.cpp +) + +target_include_directories(clambcremoveundefs_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveundefs_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveundefs_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveundefs shared library. +# +add_library( clambcremoveundefs SHARED ) +target_link_libraries( clambcremoveundefs + PUBLIC + clambcremoveundefs_obj ) +set_target_properties( clambcremoveundefs PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveundefs PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveundefs PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremoveundefs DESTINATION ${CMAKE_INSTALL_LIBDIR}) + diff --git a/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp b/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp index 2151b1142f..5f5ed29502 100644 --- a/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp +++ b/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp @@ -1,19 +1,23 @@ +#include "clambc.h" +#include "ClamBCUtilities.h" + #include -#include "llvm/IR/Module.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include #include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" using namespace llvm; +/* THIS APPEARS TO NO LONGER BE NEEDED. LEAVING IN PLACE DURING THE RC PHASE, JUST IN CASE. */ + namespace { /* @@ -32,8 +36,7 @@ namespace store %struct._state* %state, %struct._state** %state.addr, align 8 store i32 %sizeof_state, i32* %sizeof_state.addr, align 4 */ -class ClamBCRemoveUndefs : public ModulePass -{ +struct ClamBCRemoveUndefs : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; std::map aborts; @@ -56,9 +59,8 @@ class ClamBCRemoveUndefs : public ModulePass FunctionType *rterrTy = FunctionType::get( Type::getInt32Ty(BB->getContext()), {Type::getInt32Ty(BB->getContext())}, false); - Constant *func_abort = - BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); - Constant *func_rterr = + FunctionCallee func_abort = BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); + FunctionCallee func_rterr = BB->getParent()->getParent()->getOrInsertFunction("bytecode_rt_error", rterrTy); BasicBlock *abort = BasicBlock::Create(BB->getContext(), "rterr.trig", BB->getParent()); Constant *PN = ConstantInt::get(Type::getInt32Ty(BB->getContext()), 99); @@ -217,14 +219,15 @@ class ClamBCRemoveUndefs : public ModulePass } public: - static char ID; - ClamBCRemoveUndefs() - : ModulePass(ID) {} + ClamBCRemoveUndefs() {} virtual ~ClamBCRemoveUndefs() {} - bool runOnModule(Module &m) override + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { + /*This no longer appears to be needed. Will keep it during the -rc phase and then remove.*/ + return PreservedAnalyses::all(); + pMod = &m; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -240,13 +243,30 @@ class ClamBCRemoveUndefs : public ModulePass delLst[i]->eraseFromParent(); } - return bChanged; + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of struct ClamBCRemoveUndefs } // end of anonymous namespace -char ClamBCRemoveUndefs::ID = 0; -static RegisterPass X("clambc-remove-undefs", "Remove Undefs", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUndefs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-undefs") { + FPM.addPass(ClamBCRemoveUndefs()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt new file mode 100644 index 0000000000..7a03f1b74e --- /dev/null +++ b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/CMakeLists.txt @@ -0,0 +1,42 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcremoveunsupportedicmpintrinsics object library +# +add_library(clambcremoveunsupportedicmpintrinsics_obj OBJECT) +target_sources(clambcremoveunsupportedicmpintrinsics_obj + PRIVATE + ClamBCRemoveUnsupportedICMPIntrinsics.cpp +) + +target_include_directories(clambcremoveunsupportedicmpintrinsics_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcremoveunsupportedicmpintrinsics_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcremoveunsupportedicmpintrinsics_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcremoveunsupportedicmpintrinsics shared library. +# +add_library( clambcremoveunsupportedicmpintrinsics SHARED ) +target_link_libraries( clambcremoveunsupportedicmpintrinsics + PUBLIC + clambcremoveunsupportedicmpintrinsics_obj ) +set_target_properties( clambcremoveunsupportedicmpintrinsics PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcremoveunsupportedicmpintrinsics PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcremoveunsupportedicmpintrinsics PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcremoveunsupportedicmpintrinsics DESTINATION ${CMAKE_INSTALL_LIBDIR}) + diff --git a/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp new file mode 100644 index 0000000000..9a7ae73374 --- /dev/null +++ b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics/ClamBCRemoveUnsupportedICMPIntrinsics.cpp @@ -0,0 +1,151 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Remove smin intrinsic because it's not supported by our runtime. + */ +struct ClamBCRemoveUnsupportedICMPIntrinsics : public PassInfoMixin { + protected: + Module *pMod = nullptr; + //const char * const UnsupportedICMPIntrinsics_NAME = ".smin"; + + FunctionType *sminType = nullptr; + + virtual llvm::FunctionType *getUnsupportedICMPIntrinsicsFunctionType(Type *functionArgType) + { + return FunctionType::get(functionArgType, {functionArgType, functionArgType}, false); + } + + virtual llvm::Function *addFunction(Type *functionArgType, + const char *const newName, + llvm::CmpInst::Predicate predicate) + { + + uint32_t addressSpace = pMod->getDataLayout().getProgramAddressSpace(); + + FunctionType *ft = getUnsupportedICMPIntrinsicsFunctionType(functionArgType); + + llvm::Function *smin = Function::Create(ft, GlobalValue::InternalLinkage, newName, *pMod); + Value *pLeft = smin->getArg(0); + Value *pRight = smin->getArg(1); + BasicBlock *pEntry = BasicBlock::Create(pMod->getContext(), "entry", smin); + BasicBlock *pLHS = BasicBlock::Create(pMod->getContext(), "left", smin); + BasicBlock *pRHS = BasicBlock::Create(pMod->getContext(), "right", smin); + BasicBlock *pRetBlock = BasicBlock::Create(pMod->getContext(), "ret", smin); + + //entry block + AllocaInst *retVar = new AllocaInst(functionArgType, addressSpace, "ret", pEntry); + ICmpInst *cmp = new ICmpInst(*pEntry, predicate, pLeft, pRight, "icmp"); + BranchInst::Create(pLHS, pRHS, cmp, pEntry); + + //left > right + new StoreInst(pLeft, retVar, pLHS); + BranchInst::Create(pRetBlock, pLHS); + + //right >= left + new StoreInst(pRight, retVar, pRHS); + BranchInst::Create(pRetBlock, pRHS); + + LoadInst *pli = new LoadInst(functionArgType, retVar, "load", pRetBlock); + ReturnInst::Create(pMod->getContext(), pli, pRetBlock); + return smin; + } + + virtual bool replaceCalls(const char *const intrinsicName, + const char *newName, + llvm::CmpInst::Predicate predicate, + Type *functionArgType) + { + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()) { + Function *smin = addFunction(functionArgType, newName, predicate); + replaceAllCalls(getUnsupportedICMPIntrinsicsFunctionType(functionArgType), smin, calls, "ClamBCRemoveUnsupportedICMPIntrinsics_"); + + return true; + } + return false; + } + + public: + virtual ~ClamBCRemoveUnsupportedICMPIntrinsics() {} + + /*TODO: Add detection of these instructions to the validator.*/ + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.smin.i32", ".smin.32", CmpInst::ICMP_SLT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smin.i16", ".smin.16", CmpInst::ICMP_SLT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umin.i16", ".umin.16", CmpInst::ICMP_ULT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umin.i32", ".umin.32", CmpInst::ICMP_ULT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umax.i32", ".umax.32", CmpInst::ICMP_UGT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umax.i16", ".umax.16", CmpInst::ICMP_UGT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smax.i32", ".smax.32", CmpInst::ICMP_SGT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smax.i16", ".smax.16", CmpInst::ICMP_SGT, Type::getInt16Ty(pMod->getContext())); + + if (bRet) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + +}; // end of struct ClamBCRemoveUnsupportedICMPIntrinsics + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUnsupportedICMPIntrinsics", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-unsupported-icmp-intrinsics") { + FPM.addPass(ClamBCRemoveUnsupportedICMPIntrinsics()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCTrace/CMakeLists.txt b/libclambcc/ClamBCTrace/CMakeLists.txt new file mode 100644 index 0000000000..d209e73666 --- /dev/null +++ b/libclambcc/ClamBCTrace/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambctrace object library +# +add_library(clambctrace_obj OBJECT) +target_sources(clambctrace_obj + PRIVATE + ClamBCTrace.cpp +) + +target_include_directories(clambctrace_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambctrace_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambctrace_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambctrace shared library. +# +add_library( clambctrace SHARED ) +target_link_libraries( clambctrace + PUBLIC + clambctrace_obj ) +set_target_properties( clambctrace PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambctrace PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambctrace PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambctrace DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCTrace/ClamBCTrace.cpp b/libclambcc/ClamBCTrace/ClamBCTrace.cpp index 2943147ad2..0b55884c68 100644 --- a/libclambcc/ClamBCTrace/ClamBCTrace.cpp +++ b/libclambcc/ClamBCTrace/ClamBCTrace.cpp @@ -20,7 +20,6 @@ * MA 02110-1301, USA. */ #include "clambc.h" -#include "ClamBCModule.h" #include "ClamBCCommon.h" #include "ClamBCUtilities.h" @@ -34,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -55,22 +56,19 @@ static cl::opt InsertTracing("clambc-trace", cl::Hidden, cl::init(false), cl::desc("Enable tracing of bytecode execution")); -namespace +namespace ClamBCTrace { -class ClamBCTrace : public ModulePass + +class ClamBCTrace : public PassInfoMixin { public: - static char ID; - ClamBCTrace() - : ModulePass(ID) {} + ClamBCTrace() {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Execution Tracing"; } - virtual bool runOnModule(Module &M); + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM); }; -char ClamBCTrace::ID; -} // namespace /* declare i32 @trace_directory(i8*, i32) @@ -87,10 +85,11 @@ declare i32 @trace_ptr(i8*, i32) */ -bool ClamBCTrace::runOnModule(Module &M) +PreservedAnalyses ClamBCTrace::run(Module &M, ModuleAnalysisManager &MAM) { - if (!InsertTracing) - return false; + if (!InsertTracing) { + return PreservedAnalyses::all(); + } unsigned MDDbgKind = M.getContext().getMDKindID("dbg"); DenseMap scopeIDs; unsigned scopeid = 0; @@ -102,16 +101,16 @@ bool ClamBCTrace::runOnModule(Module &M) args.push_back(I32Ty); FunctionType *FTy = FunctionType::get(I32Ty, args, false); /* llvm 10 replaces this with FunctionCallee. */ - Constant *trace_directory = M.getOrInsertFunction("trace_directory", FTy); - Constant *trace_scope = M.getOrInsertFunction("trace_scope", FTy); - Constant *trace_source = M.getOrInsertFunction("trace_source", FTy); - Constant *trace_op = M.getOrInsertFunction("trace_op", FTy); - Constant *trace_value = M.getOrInsertFunction("trace_value", FTy); - Constant *trace_ptr = M.getOrInsertFunction("trace_ptr", FTy); + FunctionCallee trace_directory = M.getOrInsertFunction("trace_directory", FTy); + FunctionCallee trace_scope = M.getOrInsertFunction("trace_scope", FTy); + FunctionCallee trace_source = M.getOrInsertFunction("trace_source", FTy); + FunctionCallee trace_op = M.getOrInsertFunction("trace_op", FTy); + FunctionCallee trace_value = M.getOrInsertFunction("trace_value", FTy); + FunctionCallee trace_ptr = M.getOrInsertFunction("trace_ptr", FTy); assert(trace_scope && trace_source && trace_op && trace_value && trace_directory && trace_ptr); - if (!trace_directory->use_empty() || !trace_scope->use_empty() || !trace_source->use_empty() || !trace_op->use_empty() || - !trace_value->use_empty() || !trace_ptr->use_empty()) { + if (!trace_directory.getCallee()->use_empty() || !trace_scope.getCallee()->use_empty() || !trace_source.getCallee()->use_empty() || !trace_op.getCallee()->use_empty() || + !trace_value.getCallee()->use_empty() || !trace_ptr.getCallee()->use_empty()) { ClamBCStop("Tracing API can only be used by compiler!\n", &M); } @@ -156,7 +155,6 @@ bool ClamBCTrace::runOnModule(Module &M) while (llvm::isa(scope)) { DILexicalBlock *lex = llvm::cast(scope); //scope = lex->getContext(); - /*aragusa: I have no idea if this is the right thing to do here.*/ scope = lex->getScope(); } @@ -197,11 +195,6 @@ bool ClamBCTrace::runOnModule(Module &M) for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); AI != AE; ++AI) { if (isa(AI->getType())) { -#if 0 - Value *V = builder.CreateIntCast(AI, Type::getInt32Ty(M.getContext()), false); - Value *ValueName = builder.CreateGlobalStringPtr(AI->getName().data()); - builder.CreateCall2(trace_value, ValueName, V); -#endif } else if (isa(AI->getType())) { Value *V = builder.CreatePointerCast(AI, PointerType::getUnqual(Type::getInt8Ty(M.getContext()))); @@ -218,13 +211,7 @@ bool ClamBCTrace::runOnModule(Module &M) std::vector args = { Op, ConstantInt::get(Type::getInt32Ty(M.getContext()), Loc->getColumn())}; builder.CreateCall(trace_op, args, "ClamBCTrace_trace_op"); - //Value *ValueName = builder.CreateGlobalStringPtr(II->getName().data()); if (isa(II->getType())) { -#if 0 - builder.SetInsertPoint(&*J, BBIt); - Value *V = builder.CreateIntCast(II, Type::getInt32Ty(M.getContext()), false); - builder.CreateCall2(trace_value, ValueName, V); -#endif } else if (isa(II->getType())) { builder.SetInsertPoint(&*J, BBIt); Value *V = builder.CreatePointerCast(II, @@ -237,10 +224,26 @@ bool ClamBCTrace::runOnModule(Module &M) } } } - return true; + return PreservedAnalyses::none(); } -llvm::ModulePass *createClamBCTrace() +} // namespace ClamBCTrace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCTrace(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCTrace", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-trace") { + FPM.addPass(ClamBCTrace::ClamBCTrace()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCVerifier/CMakeLists.txt b/libclambcc/ClamBCVerifier/CMakeLists.txt new file mode 100644 index 0000000000..ca3bf30ec8 --- /dev/null +++ b/libclambcc/ClamBCVerifier/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcverifier object library +# +add_library(clambcverifier_obj OBJECT) +target_sources(clambcverifier_obj + PRIVATE + ClamBCVerifier.cpp +) + +target_include_directories(clambcverifier_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcverifier_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcverifier_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcverifier shared library. +# +add_library( clambcverifier SHARED ) +target_link_libraries( clambcverifier + PUBLIC + clambcverifier_obj ) +set_target_properties( clambcverifier PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcverifier PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcverifier PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcverifier DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp index c6f1f290fc..3102c1b8e6 100644 --- a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp +++ b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp @@ -41,55 +41,57 @@ * } */ -#include -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" +#include "ClamBCDiagnostics.h" +#include "clambc.h" +#include "ClamBCUtilities.h" -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include +#include +#include -using namespace llvm; +#include -#include "ClamBCDiagnostics.h" -#include "ClamBCModule.h" #include #include -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" +#include +#include +#include +#include #include #include #include #include #include #include -#include "llvm/Support/CommandLine.h" +#include #include #include #include -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include #include -#include "llvm/ADT/SmallSet.h" +#include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" +#include +#include +#include +#include -static cl::opt - StopOnFirstError("clambc-stopfirst", cl::init(false), - cl::desc("Stop on first error in the verifier")); -namespace +#include + +#include + +using namespace llvm; + +namespace ClamBCVerifier { -class ClamBCVerifier : public FunctionPass, +class ClamBCVerifier : public PassInfoMixin, public InstVisitor { - ScalarEvolution *SE; - DominatorTree *DT; - BasicBlock *AbrtBB; bool Final; llvm::Module *pMod = nullptr; @@ -110,6 +112,8 @@ class ClamBCVerifier : public FunctionPass, } bool visitSelectInst(SelectInst &I) { + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" + << "Selects need tobe removed, so this should be a false\n"; return true; } bool visitBranchInst(BranchInst &BI) @@ -134,8 +138,24 @@ class ClamBCVerifier : public FunctionPass, return true; } + /* + * FreezeInst's are used to guarantee a value being set to something fixed + * if it is undef or a poison value. They are a noop otherwise, so we will allow + * them in the verifier, and remove them in a pass to be run after the verifier. + * (a 'verifier' shouldn't be changing the IR). + */ + bool visitFreezeInst(FreezeInst &I) + { + return true; + } + bool visitInstruction(Instruction &I) { + + DEBUG_VALUE(&I); +#define DEBUG_NODEREF(val) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << val << "\n"; + DEBUG_NODEREF(llvm::isa(&I)); + printDiagnostic("Unhandled instruction in verifier", &I); return false; } @@ -144,7 +164,12 @@ class ClamBCVerifier : public FunctionPass, { Function *ret = pci->getCalledFunction(); if (nullptr == ret) { - Value *v = pci->getCalledValue(); + Value *v = pci->getOperand(0); /*This is the called operand.*/ + if (nullptr == v) { + llvm::errs() << "<" << __LINE__ << ">" << *pci << "\n"; + llvm::errs() << "<" << __LINE__ << ">" << *(pci->getOperand(0)) << "\n"; + assert(0 && "How do I handle function pointers?"); + } if (BitCastOperator *bco = llvm::dyn_cast(v)) { ret = llvm::dyn_cast(bco->getOperand(0)); } @@ -152,29 +177,21 @@ class ClamBCVerifier : public FunctionPass, return ret; } - bool visitCallInst(CallInst &CI) + + bool validateFunction(const llvm::Function *pFunc) { - Function *F = getCalledFunctionFromCallInst(&CI); - if (!F) { - printDiagnostic("Indirect call checking not implemented yet!", &CI); - return false; - } - if (F->getCallingConv() != CI.getCallingConv()) { - printDiagnostic("For call to " + F->getName() + ", calling conventions don't match!", &CI); - return false; - } - if (F->isVarArg()) { - if (!F->getFunctionType()->getNumParams()) { + if (pFunc->isVarArg()) { + if (!pFunc->getFunctionType()->getNumParams()) { printDiagnostic(("Calling implicitly declared function '" + - F->getName() + "' is not supported (did you forget to" - "implement it, or typoed the function name?)") + pFunc->getName() + "' is not supported (did you forget to" + "implement it, or typoed the function name?)") .str(), - &CI); + pFunc); } else { printDiagnostic("Checking calls to vararg functions/functions without" "a prototype is not supported!", - &CI); + pFunc); } return false; } @@ -182,6 +199,23 @@ class ClamBCVerifier : public FunctionPass, return true; } + bool visitCallInst(CallInst &CI) + { + Function *F = getCalledFunctionFromCallInst(&CI); + if (!F) { + /*Determine if we want to allow indirect calls*/ + printDiagnostic("Indirect call checking not implemented!", &CI); + return false; + } + + if (F->getCallingConv() != CI.getCallingConv()) { + printDiagnostic("For call to " + F->getName() + ", calling conventions don't match!", &CI); + return false; + } + + return validateFunction(F); + } + bool visitPHINode(PHINode &PN) { for (unsigned i = 0; i < PN.getNumIncomingValues(); i++) { @@ -209,41 +243,122 @@ class ClamBCVerifier : public FunctionPass, return true; } + virtual bool isHandled(Instruction *pInst) + { + bool bRet = llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst); + + return bRet; + } + + virtual bool isUndefOrPoisonValue(Value *pv) + { + return llvm::isa(pv); + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce, std::set &visited) + { + if (visited.end() != std::find(visited.begin(), visited.end(), pce)) { + return false; + } + visited.insert(pce); + + for (size_t i = 0; i < pce->getNumOperands(); i++) { + Value *pv = pce->getOperand(i); + if (isUndefOrPoisonValue(pv)) { + return true; + } + if (ConstantExpr *ce = llvm::dyn_cast(pv)) { + if (hasUndefsOrPoisonValues(ce, visited)) { + return true; + } + } + } + + return false; + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce) + { + std::set visited; + return hasUndefsOrPoisonValues(pce, visited); + } + + /*PoisonValue is derived from UndefValue, so we only have to check for that one.*/ + virtual bool hasUndefsOrPoisonValues(Instruction *pInst) + { + for (size_t i = 0; i < pInst->getNumOperands(); i++) { + Value *pVal = pInst->getOperand(i); + if (llvm::isa(pVal)) { + continue; + } + + if (isUndefOrPoisonValue(pVal)) { + return true; + } + + if (ConstantExpr *pce = llvm::dyn_cast(pVal)) { + if (hasUndefsOrPoisonValues(pce)) { + return true; + } + } + } + return false; + } + + virtual bool walk(Function *pFunc) + { + bool bRet = true; + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++) { + BasicBlock *pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { + Instruction *pInst = llvm::cast(bi); + if (hasUndefsOrPoisonValues(pInst)) { + printDiagnostic("Poison value or Undef value found in instruction.", pInst); + return false; + } + + if (PHINode *pn = llvm::dyn_cast(pInst)) { + bRet = visitPHINode(*pn); + } else if (CallInst *pci = llvm::dyn_cast(pInst)) { + bRet = visitCallInst(*pci); + } else if (SwitchInst *psi = llvm::dyn_cast(pInst)) { + bRet = visitSwitchInst(*psi); + } else { + bRet = isHandled(pInst); + } + + if (!bRet) { + break; + } + } + } + + return bRet; + } + public: - static char ID; explicit ClamBCVerifier() - : FunctionPass(ID), Final(false) {} + : Final(false) {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Verifier"; } - virtual bool runOnFunction(Function &F) + PreservedAnalyses run(Function &F, FunctionAnalysisManager &fam) { - pMod = F.getParent(); - AbrtBB = 0; - SE = &getAnalysis().getSE(); - ; - DT = &getAnalysis().getDomTree(); - - bool OK = true; - std::vector insns; - // verifying can insert runtime checks, so be safe and create an initial - // list of instructions to process so we are not affected by transforms. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - insns.push_back(&*I); - } - for (std::vector::iterator I = insns.begin(), E = insns.end(); - I != E; ++I) { - OK &= visit(*I); - if (!OK && StopOnFirstError) - break; + pMod = F.getParent(); + bool OK = validateFunction(&F); + if (OK) { + OK = walk(&F); } - if (!OK) + + if (!OK) { ClamBCStop("Verifier rejected bytecode function due to errors", &F); - return false; + } + + return PreservedAnalyses::all(); } virtual void getAnalysisUsage(AnalysisUsage &AU) const { @@ -252,10 +367,25 @@ class ClamBCVerifier : public FunctionPass, AU.setPreservesAll(); } }; -char ClamBCVerifier::ID = 0; +//char ClamBCVerifier::ID = 0; -} // namespace +} // namespace ClamBCVerifier -static RegisterPass X("clambc-verifier", "ClamBCVerifier Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCVerifier", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, FunctionPassManager &FPM, + ArrayRef) { + if (Name == "clambc-verifier") { + FPM.addPass(ClamBCVerifier::ClamBCVerifier()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCWriter/CMakeLists.txt b/libclambcc/ClamBCWriter/CMakeLists.txt new file mode 100644 index 0000000000..a3d1fc3b65 --- /dev/null +++ b/libclambcc/ClamBCWriter/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambcwriter object library +# +add_library(clambcwriter_obj OBJECT) +target_sources(clambcwriter_obj + PRIVATE + ClamBCWriter.cpp +) + +target_include_directories(clambcwriter_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${CLAMBC_LIB_DIR}/Common # For anything in 'Common' + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambcwriter_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambcwriter_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambcwriter shared library. +# +add_library( clambcwriter SHARED ) +target_link_libraries( clambcwriter + PUBLIC + clambcwriter_obj ) +set_target_properties( clambcwriter PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambcwriter PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambcwriter PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambcwriter DESTINATION ${CMAKE_INSTALL_LIBDIR}) + + diff --git a/libclambcc/ClamBCWriter/ClamBCWriter.cpp b/libclambcc/ClamBCWriter/ClamBCWriter.cpp index e1f60a4fba..471a1d8133 100644 --- a/libclambcc/ClamBCWriter/ClamBCWriter.cpp +++ b/libclambcc/ClamBCWriter/ClamBCWriter.cpp @@ -19,11 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" +#include "bytecode_api.h" #include "clambc.h" #include "ClamBCModule.h" +#include "ClamBCUtilities.h" + #include "ClamBCAnalyzer/ClamBCAnalyzer.h" -#include "Common/ClamBCUtilities.h" +#include "ClamBCRegAlloc/ClamBCRegAlloc.h" #include #include @@ -45,6 +47,8 @@ #include #include #include +#include +#include #include #include #include @@ -105,7 +109,7 @@ class ClamBCOutputWriter public: static ClamBCOutputWriter *createClamBCOutputWriter(llvm::StringRef srFileName, llvm::Module *pMod, - ClamBCAnalyzer *pAnalyzer) + ClamBCAnalysis *pAnalyzer) { std::error_code ec; raw_fd_ostream *rfo = new raw_fd_ostream(srFileName, ec); @@ -123,7 +127,7 @@ class ClamBCOutputWriter return ret; } - ClamBCOutputWriter(llvm::formatted_raw_ostream &outStream, llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + ClamBCOutputWriter(llvm::formatted_raw_ostream &outStream, llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) : Out(lineBuffer), OutReal(outStream), maxLineLength(0), lastLinePos(0), pMod(pMod), pAnalyzer(pAnalyzer) { printGlobals(pMod, pAnalyzer); @@ -162,7 +166,7 @@ class ClamBCOutputWriter printFixedNumber(Out, n, fixed); } - void printModuleHeader(Module &M, ClamBCAnalyzer *pAnalyzer, unsigned maxLine) + void printModuleHeader(Module &M, ClamBCAnalysis *pAnalyzer, unsigned maxLine) { NamedMDNode *MinFunc = M.getNamedMetadata("clambc.funcmin"); NamedMDNode *MaxFunc = M.getNamedMetadata("clambc.funcmax"); @@ -251,7 +255,7 @@ class ClamBCOutputWriter assert((OutReal.tell() < 8192) && "OutReal too big"); } - void describeType(llvm::raw_ostream &Out, const Type *Ty, Module *M, ClamBCAnalyzer *pAnalyzer) + void describeType(llvm::raw_ostream &Out, const Type *Ty, Module *M, ClamBCAnalysis *pAnalyzer) { if (const FunctionType *FTy = dyn_cast(Ty)) { printFixedNumber(Out, 1, 1); @@ -310,7 +314,7 @@ class ClamBCOutputWriter if (const PointerType *PTy = dyn_cast(Ty)) { printFixedNumber(Out, 5, 1); - const Type *ETy = PTy->getElementType(); + const Type *ETy = PTy->getPointerElementType(); // pointers to opaque types are treated as i8* int id = -1; if (llvm::isa(ETy)) { @@ -402,7 +406,7 @@ class ClamBCOutputWriter ClamBCStop("Unsupported constant type", &M); } - void printGlobals(llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + void printGlobals(llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) { const std::string &ls = pAnalyzer->getLogicalSignature(); if (ls.empty()) { @@ -441,7 +445,7 @@ class ClamBCOutputWriter // function prototype printNumber(Out, pAnalyzer->getTypeID(F->getFunctionType()), false); // function name - std::string Name = F->getName(); + std::string Name(F->getName()); printConstData(Out, (const unsigned char *)Name.c_str(), Name.size() + 1); } @@ -533,7 +537,7 @@ class ClamBCOutputWriter } } - void finished(llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + void finished(llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) { //maxline+1, 1 more for \0 @@ -617,7 +621,7 @@ class ClamBCOutputWriter int maxLineLength = 0; int lastLinePos = 0; llvm::Module *pMod = nullptr; - ClamBCAnalyzer *pAnalyzer = nullptr; + ClamBCAnalysis *pAnalyzer = nullptr; void printFixedNumber(raw_ostream &Out, unsigned n, unsigned fixed) { @@ -684,31 +688,31 @@ class ClamBCOutputWriter } }; -class ClamBCWriter : public ModulePass, public InstVisitor +class ClamBCWriter : public PassInfoMixin, public InstVisitor { typedef DenseMap BBIDMap; BBIDMap BBMap; const Module *TheModule = nullptr; unsigned opcodecvt[Instruction::OtherOpsEnd]; - raw_ostream *MapOut = nullptr; - FunctionPass *Dumper = nullptr; - ClamBCRegAlloc *RA = nullptr; + raw_ostream *MapOut = nullptr; + FunctionPass *Dumper = nullptr; + ClamBCRegAllocAnalysis *RA = nullptr; unsigned fid, minflvl; MetadataContext *TheMetadata = nullptr; unsigned MDDbgKind; std::vector dbgInfo; bool anyDbg; - llvm::Module *pMod = nullptr; - ClamBCOutputWriter *pOutputWriter = nullptr; - ClamBCAnalyzer *pAnalyzer = nullptr; + llvm::Module *pMod = nullptr; + ClamBCOutputWriter *pOutputWriter = nullptr; + ClamBCAnalysis *pAnalyzer = nullptr; + ModuleAnalysisManager *pModuleAnalysisManager = nullptr; public: static char ID; explicit ClamBCWriter() - : ModulePass(ID), - TheModule(0), MapOut(0), Dumper(0) + : TheModule(0), MapOut(0), Dumper(0) { if (!MapFile.empty()) { std::error_code ec; @@ -735,18 +739,20 @@ class ClamBCWriter : public ModulePass, public InstVisitor void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); AU.setPreservesAll(); } virtual bool doInitialization(Module &M); - bool runOnModule(Module &m) + PreservedAnalyses run(Module &m, ModuleAnalysisManager &mam) { + doInitialization(m); + pMod = &m; + pModuleAnalysisManager = &mam; - pMod = &m; - pAnalyzer = &getAnalysis(); - pOutputWriter = ClamBCOutputWriter::createClamBCOutputWriter(outFile, pMod, pAnalyzer); + ClamBCAnalysis &analysis = mam.getResult(m); + pAnalyzer = &analysis; + pOutputWriter = ClamBCOutputWriter::createClamBCOutputWriter(outFile, pMod, pAnalyzer); for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { if (llvm::isa(i)) { @@ -757,7 +763,8 @@ class ClamBCWriter : public ModulePass, public InstVisitor } } - return false; + doFinalization(m); + return PreservedAnalyses::all(); } void gatherGEPs(BasicBlock *pBB, std::vector &geps) @@ -821,7 +828,11 @@ class ClamBCWriter : public ModulePass, public InstVisitor GetElementPtrInst *pNew = nullptr; if (pGep->isInBounds()) { - pNew = GetElementPtrInst::Create(nullptr, ci, newIndex, "ClamBCWriter_fixGEPs", pGep); + Type *pt = ci->getType(); + if (llvm::isa(pt)) { + pt = pt->getPointerElementType(); + } + pNew = GetElementPtrInst::Create(pt, ci, newIndex, "ClamBCWriter_fixGEPs", pGep); } else { assert(0 && "DON'T THINK THIS CAN HAPPEN"); } @@ -837,12 +848,8 @@ class ClamBCWriter : public ModulePass, public InstVisitor bool runOnFunction(Function &F) { - //TODO: Move this to another pass once the Analyzer no longer - //makes changes to the code. fixGEPs(&F); - //Don't think I need this anymore. - //If anything, move it to a verifier. if ("" == F.getName()) { assert(0 && "Function created by ClamBCRebuild is not being deleted"); } @@ -857,10 +864,13 @@ class ClamBCWriter : public ModulePass, public InstVisitor return false; } fid++; + //Removed, see note about getFunctionID at the top of the file. - assert(pAnalyzer->getFunctionID(&F) == fid); + assert(pAnalyzer->getFunctionID(&F) == fid && "Function IDs don't match"); + + FunctionAnalysisManager &fam = pModuleAnalysisManager->getResult(*pMod).getManager(); - RA = &getAnalysis(F); + RA = &fam.getResult(F); printFunction(F); if (Dumper) { Dumper->runOnFunction(F); @@ -976,7 +986,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { if (!CI->isZero()) { const PointerType *Ty = cast(GEP.getPointerOperand()->getType()); - const ArrayType *ATy = dyn_cast(Ty->getElementType()); + const ArrayType *ATy = dyn_cast(Ty->getPointerElementType()); if (ATy) { ClamBCStop("ATy", &GEP); } @@ -1387,10 +1397,6 @@ class ClamBCWriter : public ModulePass, public InstVisitor stop("ClamAV bytecode backend does not know about ", &I); } }; -char ClamBCWriter::ID = 0; -static RegisterPass X("clambc-writer", "ClamBCWriter Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); bool ClamBCWriter::doInitialization(Module &M) { @@ -1423,8 +1429,7 @@ bool ClamBCWriter::doInitialization(Module &M) //TODO: Get debug info working. //Dumper = createDbgInfoPrinterPass(); } - fid = 0; - //OModule->writeGlobalMap(MapOut); + fid = 0; MDDbgKind = M.getContext().getMDKindID("dbg"); return false; @@ -1608,7 +1613,21 @@ void ClamBCWriter::printBasicBlock(BasicBlock *BB) } } -llvm::ModulePass *createClamBCWriter() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCWriter(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCWriter", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-writer") { + FPM.addPass(ClamBCWriter()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/Common/CMakeLists.txt b/libclambcc/Common/CMakeLists.txt new file mode 100644 index 0000000000..f5938d51c0 --- /dev/null +++ b/libclambcc/Common/CMakeLists.txt @@ -0,0 +1,42 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The clambccommon object library +# +add_library(clambccommon_obj OBJECT) +target_sources(clambccommon_obj + PRIVATE + ClamBCDiagnostics.cpp + ClamBCUtilities.cpp + version.c +) + +target_include_directories(clambccommon_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + .. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(clambccommon_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambccommon_obj -DLOG_BEFORE_AFTER=1) + +# +# The clambccommon shared library. +# +add_library( clambccommon SHARED ) +target_link_libraries( clambccommon + PUBLIC + clambccommon_obj ) +set_target_properties( clambccommon PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(clambccommon PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(clambccommon PUBLIC ${LLVM_LIBS}) + +install(TARGETS clambccommon DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/libclambcc/Common/ClamBCDiagnostics.cpp b/libclambcc/Common/ClamBCDiagnostics.cpp index 79c634b61f..f7323dde9c 100644 --- a/libclambcc/Common/ClamBCDiagnostics.cpp +++ b/libclambcc/Common/ClamBCDiagnostics.cpp @@ -20,7 +20,10 @@ * MA 02110-1301, USA. */ #define DEBUGTYPE "clambcdiags" + +#include "clambc.h" #include "ClamBCDiagnostics.h" + #include #include #include @@ -29,43 +32,16 @@ #include #include -#include "clambc.h" using namespace llvm; -#if 0 -static inline void printSep(bool hasColors) -{ - if (hasColors) { - errs().resetColor(); - } - errs() << ":"; - if (hasColors) { - errs().changeColor(raw_ostream::SAVEDCOLOR, true); - } -} -#endif - // Print the main compile unit's source filename, // falls back to printing the module identifier. static void printLocation(const llvm::Module *M) { NamedMDNode *ND = M->getNamedMetadata("llvm.dbg.gv"); if (ND) { -#if 0 - unsigned N = ND->getNumOperands(); - // Try to find main compile unit - for (unsigned i = 0; i < N; i++) { - DIGlobalVariable G(ND->getOperand(i)); - DICompileUnit CU(G.getCompileUnit()); - if (!CU.isMain()) - continue; - errs() << /*CU.getDirectory() << "/" <<*/ CU.getFilename() << ": "; - return; - } -#else DEBUGERR << "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } errs() << M->getModuleIdentifier() << ": "; } @@ -81,27 +57,10 @@ static void printLocation(const llvm::Function *F) I != E; ++I) { if (const Instruction *T = I->getTerminator()) { if (MDNode *N = T->getMetadata(MDDebugKind)) { -#if 0 - DILocation Loc(N); - DIScope Scope = Loc.getScope(); - while (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope.getNode()); - Scope = LB.getContext(); - } - if (Scope.isSubprogram()) { - DISubprogram SP(Scope.getNode()); - errs() << /*Loc.getDirectory() << "/" << */ Loc.getFilename() - << ": in function '" - << SP.getDisplayName() - << "': "; - return; - } -#else DEBUGERR << N << "\n"; DEBUGERR << *N << "\n"; DEBUGERR << "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } } } @@ -126,31 +85,9 @@ void printLocation(const llvm::Instruction *I, bool fallback) BasicBlock::const_iterator ItB = BB->begin(); while (It != ItB) { if (MDNode *N = It->getMetadata("dbg")) { -#if 0 - DILocation Loc(N); - errs() << /*Loc.getDirectory() << "/" <<*/ Loc.getFilename() - << ":" << Loc.getLineNumber(); - if (unsigned Col = Loc.getColumnNumber()) { - errs() << ":" << Col; - } - if (approx) - errs() << "(?)"; - errs() << ": "; - DIScope Scope = Loc.getScope(); - while (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope.getNode()); - Scope = LB.getContext(); - } - if (Scope.isSubprogram()) { - DISubprogram SP(Scope.getNode()); - errs() << "in function '" << SP.getDisplayName() << "': "; - } - return; -#else DEBUGERR << *N << "\n"; DEBUGERR << approx << "\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } approx = true; --It; @@ -175,18 +112,8 @@ void printValue(const llvm::Value *V, bool printLocation, bool fallback) unsigned Line = 0; std::string File; std::string Dir; -#if 0 - if (!getLocationInfo(V, DisplayName, Type, Line, File, Dir)) { - if (fallback) - errs() << *V << "\n: "; - else - errs() << V->getName() << ": "; - return; - } -#else DEBUGERR << "FIXME: FIGURE OUT WHAT 'getLocationInfo' has been replaced with" << "\n"; -#endif errs() << "'" << DisplayName << "' "; if (printLocation) errs() << " (" << File << ":" << Line << ")"; @@ -201,15 +128,8 @@ void printLocation(const llvm::Module *M, const llvm::Value *V) unsigned Line = 0; std::string File; std::string Dir; -#if 0 - if (!getLocationInfo(V, DisplayName, Type, Line, File, Dir)) { - printLocation(M); - return; - } -#else DEBUGERR << "FIXME: FIGURE OUT WHAT 'getLocationInfo' has been replaced with" << "\n"; -#endif errs() << /*Dir << "/" <<*/ File << ":" << Line << ": "; } diff --git a/libclambcc/Common/ClamBCModule.h b/libclambcc/Common/ClamBCModule.h index a4a80c236d..ea7b74a7b5 100644 --- a/libclambcc/Common/ClamBCModule.h +++ b/libclambcc/Common/ClamBCModule.h @@ -21,19 +21,22 @@ */ #ifndef CLAMBC_MODULE_H #define CLAMBC_MODULE_H -#include -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" + +#include "clambc.h" + +#include +#include +#include +#include +#include #include #include -#include "llvm/Support/raw_ostream.h" +#include + #include #include +#include -#include "clambc.h" class ClamBCWriter; class ClamBCRegAlloc; @@ -154,12 +157,6 @@ class ClamBCModule : public llvm::ModulePass virtual bool runOnModule(llvm::Module &M); virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const; -#if 0 - static void stop(const llvm::Twine &Msg, const llvm::Module *M); - static void stop(const llvm::Twine &Msg, const llvm::Function *F); - static void stop(const llvm::Twine &Msg, const llvm::Instruction *I); -#endif - void printNumber(uint64_t n, bool constant = false) { printNumber(Out, n, constant); diff --git a/libclambcc/Common/ClamBCUtilities.cpp b/libclambcc/Common/ClamBCUtilities.cpp index 04bfaadfcc..f94c5c5810 100644 --- a/libclambcc/Common/ClamBCUtilities.cpp +++ b/libclambcc/Common/ClamBCUtilities.cpp @@ -1,12 +1,12 @@ -#include -#include -#include - #include "ClamBCUtilities.h" #include "ClamBCDiagnostics.h" #include "clambc.h" +#include +#include +#include + using namespace llvm; void ClamBCStop(const Twine &Msg, const Module *M) @@ -40,7 +40,10 @@ bool functionRecurses(Function *pFunc, Function *orig, std::vector & for (auto blockIter = bb->begin(), blockEnd = bb->end(); blockIter != blockEnd; blockIter++) { Instruction *inst = llvm::cast(blockIter); if (CallInst *ci = llvm::dyn_cast(inst)) { - Value *calledValue = ci->getCalledValue(); + Value *calledValue = ci->getCalledFunction(); + if (nullptr == calledValue) { + ClamBCStop("Calls to function pointers not allowed", ci); + } if (calledValue == orig) { return true; } else if (Function *callee = dyn_cast(calledValue)) { @@ -139,3 +142,188 @@ llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock) { return llvm::cast(pBlock->getParent()->begin()); } + +int64_t getTypeSize(llvm::Module *pMod, llvm::Type *pt) +{ + + int64_t size = pt->getScalarSizeInBits(); + if (size) { + return size; + } + + ArrayType *pat = llvm::dyn_cast(pt); + if (nullptr != pat) { + size = pat->getNumElements() * (getTypeSize(pMod, pat->getElementType())); + if (size) { + return size; + } + } + + StructType *pst = llvm::dyn_cast(pt); + if (nullptr != pst) { + const StructLayout *psl = pMod->getDataLayout().getStructLayout(pst); + return psl->getSizeInBits(); + } + + assert(0 && "Size has not been computed"); + return -1; +} + +int64_t getTypeSizeInBytes(llvm::Module *pMod, Type *pt) +{ + return getTypeSize(pMod, pt) / 8; +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pt, uint64_t idx) +{ + + int64_t cnt = 0; + + assert((llvm::isa(pt) || llvm::isa(pt)) && "pt must be a complex type"); + + StructType *pst = llvm::dyn_cast(pt); + if (nullptr != pst) { + assert((idx <= pst->getNumElements()) && "Idx too high"); + + const StructLayout *psl = pMod->getDataLayout().getStructLayout(pst); + assert(psl && "Could not get layout"); + + cnt = psl->getElementOffsetInBits(idx) / 8; + + } else { + ArrayType *pat = llvm::dyn_cast(pt); + if (nullptr != pat) { + assert((idx <= pat->getNumElements()) && "Idx too high"); + cnt = idx * getTypeSizeInBytes(pMod, pat->getElementType()); + } + } + + return cnt; +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pst, ConstantInt *pIdx) +{ + int64_t idx = pIdx->getLimitedValue(); + return computeOffsetInBytes(pMod, pst, idx); +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pst) +{ + if (llvm::isa(pst)) { + return computeOffsetInBytes(pMod, pst, pst->getStructNumElements()); + } else if (llvm::isa(pst)) { + return computeOffsetInBytes(pMod, pst, pst->getArrayNumElements()); + } else { + assert(0 && "pt must be a complex type"); + } + + return 0; +} + +Type *findTypeAtIndex(Type *pst, ConstantInt *ciIdx) +{ + Type *ret = nullptr; + StructType *st = llvm::dyn_cast(pst); + if (nullptr != st) { + uint64_t idx = ciIdx->getLimitedValue(); + + assert(idx < st->getNumElements() && "Something went wrong"); + return st->getTypeAtIndex(idx); + } + + ArrayType *at = llvm::dyn_cast(pst); + if (nullptr != at) { + return at->getArrayElementType(); + } + return ret; +} + +/*Only pass in either ConstantExpr or Instruction */ +Type *getResultType(Value *pVal) +{ + + Type *type = nullptr; + + if (llvm::isa(pVal)) { + ConstantExpr *pce = llvm::cast(pVal); + type = pce->getOperand(0)->getType(); + } else if (llvm::isa(pVal)) { + Instruction *pInst = llvm::cast(pVal); + type = pInst->getOperand(0)->getType(); + } else { + assert(0 && "This function must be called with either Instruction or a ConstantExpr"); + return nullptr; + } + + if (llvm::isa(type)) { + if (llvm::isa(pVal)) { + GEPOperator *pgep = llvm::cast(pVal); + type = pgep->getSourceElementType(); + + } else if (llvm::isa(pVal)) { + GetElementPtrInst *pInst = llvm::cast(pVal); + type = pInst->getSourceElementType(); + } else if (llvm::isa(pVal)) { + BitCastOperator *pbco = llvm::cast(pVal); + type = pbco->getDestTy(); + } else if (llvm::isa(pVal)) { + BitCastInst *pInst = llvm::cast(pVal); + type = pInst->getDestTy(); + } else { + llvm::errs() << "<" << __LINE__ << ">" + << "https://llvm.org/docs/OpaquePointers.html" + << "\n"; + llvm::errs() << "<" << __LINE__ << ">" << *pVal << "\n"; + assert(0 && "FIGURE OUT WHAT TO DO HERE"); + } + } + + return type; +} + +void gatherCallsToIntrinsic(Function *pFunc, const char *const functionName, std::vector &calls) +{ + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++) { + BasicBlock *pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { + if (CallInst *pci = llvm::dyn_cast(bi)) { + Function *pCalled = pci->getCalledFunction(); + if (pCalled->isIntrinsic()) { + if (functionName == pCalled->getName()) { + calls.push_back(pci); + } + } + } + } + } +} + +void gatherCallsToIntrinsic(Module *pMod, const char *const functionName, std::vector &calls) +{ + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + gatherCallsToIntrinsic(pFunc, functionName, calls); + } +} + +void replaceAllCalls(FunctionType *pFuncType, Function *pFunc, + const std::vector &calls, const char *const namePrefix) +{ + + for (size_t i = 0; i < calls.size(); i++) { + CallInst *pci = calls[i]; + + std::vector args; + for (size_t i = 0; i < pci->arg_size(); i++) { + args.push_back(pci->getArgOperand(i)); + } + CallInst *pNew = CallInst::Create(pFuncType, pFunc, args, + namePrefix, pci); + pci->replaceAllUsesWith(pNew); + pci->eraseFromParent(); + } +} diff --git a/libclambcc/Common/ClamBCUtilities.h b/libclambcc/Common/ClamBCUtilities.h index a010840aa0..ec3d882a47 100644 --- a/libclambcc/Common/ClamBCUtilities.h +++ b/libclambcc/Common/ClamBCUtilities.h @@ -1,13 +1,19 @@ #ifndef CLAMBC_UTILITIES_H_ #define CLAMBC_UTILITIES_H_ +#include "ClamBCDiagnostics.h" + +#include +#include +#include + #include #include +#include #include #include - -#include "ClamBCDiagnostics.h" +#include /*These are a temporary replacement for ClamBCModule::stop. */ void ClamBCStop(const llvm::Twine &Msg, const llvm::Module *M); @@ -26,4 +32,27 @@ bool functionHasLoop(llvm::Function *pFunc, llvm::LoopInfo &loopInfo); llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock); +int64_t getTypeSize(llvm::Module *pMod, llvm::Type *pt); + +int64_t getTypeSizeInBytes(llvm::Module *pMod, llvm::Type *pt); + +int64_t computeOffsetInBytes(llvm::Module *pMod, llvm::Type *pt, uint64_t idx); + +int64_t computeOffsetInBytes(llvm::Module *pMod, llvm::Type *pst, llvm::ConstantInt *pIdx); + +int64_t computeOffsetInBytes(llvm::Module *pMod, llvm::Type *pst); + +llvm::Type *findTypeAtIndex(llvm::Type *pst, llvm::ConstantInt *ciIdx); + +llvm::Type *getResultType(llvm::Value *pVal); + +void gatherCallsToIntrinsic(llvm::Function *pFunc, const char *const functionName, + std::vector &calls); + +void gatherCallsToIntrinsic(llvm::Module *pMod, const char *const functionName, + std::vector &calls); + +void replaceAllCalls(llvm::FunctionType *pFuncType, llvm::Function *pFunc, + const std::vector &calls, const char *const namePrefix); + #endif // CLAMBC_UTILITIES_H_ diff --git a/libclambcc/Common/clambc.h b/libclambcc/Common/clambc.h index 3d790a1f9e..3e828bd6f6 100644 --- a/libclambcc/Common/clambc.h +++ b/libclambcc/Common/clambc.h @@ -136,5 +136,17 @@ enum bc_global { #define DEBUGERR llvm::errs() << "<" << __FILE__ << "::" << __FUNCTION__ << "::" << __LINE__ << ">" #endif //DEBUGERR +#ifndef DEBUG_WHERE +#define DEBUG_WHERE llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">\n" +#endif + +#ifndef DEBUG_VALUE +#define DEBUG_VALUE(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << *__value__ << "\n"; +#endif + +#ifndef DEBUG_NONPOINTER +#define DEBUG_NONPOINTER(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << __value__ << "\n"; +#endif + #define BC_START_TID 69 #endif diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9729b791a1..ee03ea2c10 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,7 +14,7 @@ if(WIN32) file(TO_NATIVE_PATH ${sigtool_EXECUTABLE} SIGTOOL) file(TO_NATIVE_PATH ${clambc_headers_DIRECTORY} HEADERS) else() - set(LD_LIBRARY_PATH $:$ENV{LD_LIBRARY_PATH}) + set(LD_LIBRARY_PATH $:$ENV{LD_LIBRARY_PATH}) set(SOURCE ${CMAKE_SOURCE_DIR}) set(BUILD ${CMAKE_BINARY_DIR})