diff --git a/.clang-format b/.clang-format index da3e28f87e6..5ae36afdd86 100644 --- a/.clang-format +++ b/.clang-format @@ -2,135 +2,223 @@ Language: Cpp AccessModifierOffset: -2 AlignAfterOpenBracket: Align -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: true -AlignConsecutiveDeclarations: false +AlignArrayOfStructures: None +AlignConsecutiveAssignments: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + PadOperators: false AlignEscapedNewlines: Left -AlignOperands: true -AlignTrailingComments: true +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true AllowShortBlocksOnASingleLine: Never AllowShortCaseLabelsOnASingleLine: false +AllowShortEnumsOnASingleLine: true AllowShortFunctionsOnASingleLine: Empty -AllowShortLambdasOnASingleLine: All AllowShortIfStatementsOnASingleLine: WithoutElse +AllowShortLambdasOnASingleLine: All AllowShortLoopsOnASingleLine: true AlwaysBreakAfterDefinitionReturnType: None AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false AlwaysBreakTemplateDeclarations: MultiLine +AttributeMacros: + - __capability BinPackArguments: true BinPackParameters: true +BitFieldColonSpacing: Both BraceWrapping: AfterCaseLabel: false AfterClass: true - AfterControlStatement: false + AfterControlStatement: Never AfterEnum: false + AfterExternBlock: false AfterFunction: true AfterNamespace: true AfterObjCDeclaration: false AfterStruct: false AfterUnion: false - AfterExternBlock: false BeforeCatch: false BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false IndentBraces: false SplitEmptyFunction: true SplitEmptyRecord: true SplitEmptyNamespace: true +BreakAfterAttributes: Never +BreakAfterJavaFieldAnnotations: false +BreakArrays: true BreakBeforeBinaryOperators: None +BreakBeforeConceptDeclarations: Always BreakBeforeBraces: Linux -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon +BreakBeforeInlineASMColon: OnlyMultiline BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false BreakConstructorInitializers: BeforeColon -BreakAfterJavaFieldAnnotations: false +BreakInheritanceList: BeforeColon BreakStringLiterals: true ColumnLimit: 0 CommentPragmas: '^ IWYU pragma:' CompactNamespaces: false -ConstructorInitializerAllOnOneLineOrOnePerLine: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true -DeriveLineEnding: true DerivePointerAlignment: true DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH +IfMacros: + - KJ_IF_MAYBE IncludeBlocks: Preserve IncludeCategories: - Regex: '^"(llvm|llvm-c|clang|clang-c)/' Priority: 2 SortPriority: 0 + CaseSensitive: false - Regex: '^(<|"(gtest|gmock|isl|json)/)' Priority: 3 SortPriority: 0 + CaseSensitive: false - Regex: '.*' Priority: 1 SortPriority: 0 + CaseSensitive: false IncludeIsMainRegex: '(Test)?$' IncludeIsMainSourceRegex: '' +IndentAccessModifiers: false +IndentCaseBlocks: false IndentCaseLabels: true +IndentExternBlock: AfterExternBlock IndentGotoLabels: true IndentPPDirectives: None +IndentRequiresClause: true IndentWidth: 4 IndentWrappedFunctionNames: false +InsertBraces: false +InsertNewlineAtEOF: false +InsertTrailingCommas: None +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 JavaScriptQuotes: Leave JavaScriptWrapImports: true KeepEmptyLinesAtTheStartOfBlocks: true +LambdaBodyIndentation: Signature +LineEnding: DeriveLF MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None ObjCBinPackProtocolList: Auto ObjCBlockIndentWidth: 2 +ObjCBreakBeforeNestedBlockParam: true ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true +PackConstructorInitializers: BinPack PenaltyBreakAssignment: 2 PenaltyBreakBeforeFirstCallParameter: 19 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 +PenaltyBreakOpenParenthesis: 0 PenaltyBreakString: 1000 PenaltyBreakTemplateDeclaration: 10 PenaltyExcessCharacter: 1000000 +PenaltyIndentedWhitespace: 0 PenaltyReturnTypeOnItsOwnLine: 60 PointerAlignment: Right +PPIndentWidth: -1 +QualifierAlignment: Leave +ReferenceAlignment: Pointer ReflowComments: true -SortIncludes: false -SortUsingDeclarations: true +RemoveBracesLLVM: false +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Leave +ShortNamespaceLines: 1 +SortIncludes: Never +SortJavaStaticImport: Before +SortUsingDeclarations: LexicographicNumeric SpaceAfterCStyleCast: false SpaceAfterLogicalNot: false SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true SpaceBeforeParens: ControlStatements +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + AfterOverloadedOperator: false + AfterRequiresInClause: false + AfterRequiresInExpression: false + BeforeNonEmptyParentheses: false SpaceBeforeRangeBasedForLoopColon: true +SpaceBeforeSquareBrackets: false SpaceInEmptyBlock: false SpaceInEmptyParentheses: false SpacesBeforeTrailingComments: 1 -SpacesInAngles: false +SpacesInAngles: Never SpacesInConditionalStatement: false SpacesInContainerLiterals: true SpacesInCStyleCastParentheses: false +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 SpacesInParentheses: false SpacesInSquareBrackets: false -SpaceBeforeSquareBrackets: false Standard: Latest +StatementAttributeLikeMacros: + - Q_EMIT StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION TabWidth: 8 -UseCRLF: false UseTab: Never +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE ... diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml index 756f1aa23af..eb9d0f9339b 100644 --- a/.github/workflows/clang-format.yml +++ b/.github/workflows/clang-format.yml @@ -1,4 +1,4 @@ -name: clang-format +name: clang-format-16 # Controls when the action will run. Triggers the workflow on push or pull request # events but only for the master branch @@ -28,6 +28,6 @@ jobs: - name: Run clang-format style check for C/C++ programs. uses: jidicula/clang-format-action@v4.4.1 with: - clang-format-version: "11" + clang-format-version: "16" check-path: ${{ matrix.path['check'] }} exclude-regex: ${{ matrix.path['exclude'] }} diff --git a/.github/workflows/cmake.yml b/.github/workflows/cmake.yml index 5285c28e233..aed0fdd0dad 100644 --- a/.github/workflows/cmake.yml +++ b/.github/workflows/cmake.yml @@ -84,7 +84,6 @@ jobs: # working-directory: ${{runner.workspace}}/build # run: cpack -C ${{ env.BUILD_TYPE }} - # TODO: Some day add support for building on macOS with any modern LLVM version, not just LLVM-8. # # build-macos: # runs-on: macos-latest @@ -131,16 +130,21 @@ jobs: # run: ctest -C ${{ env.BUILD_TYPE }} -V build-ubuntu: - runs-on: ubuntu-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v1 - name: Update package listings - run: sudo apt-get update + run: + sudo apt-get update - name: Install Dependencies - run: sudo apt-get install -y llvm-8-dev clang-8 clamav # TODO: use just 'llvm-dev' when we can support any recent LLVM version. + run: | + wget https://apt.llvm.org/llvm.sh + chmod +x llvm.sh + sudo ./llvm.sh 16 + sudo apt-get install -y clamav - name: Install pytest for easier to read test results run: python3 -m pip install pytest @@ -161,8 +165,7 @@ jobs: # and build directories, but this is only available with CMake 3.13 and higher. # The CMake binaries on the Github Actions machines are (as of this writing) 3.12 run: - cmake ${{runner.workspace}}/clamav-bytecode-compiler -DCMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} - -DENABLE_EXAMPLES=ON + cmake ${{runner.workspace}}/clamav-bytecode-compiler -D CMAKE_BUILD_TYPE=${{ env.BUILD_TYPE }} -D ENABLE_EXAMPLES=ON - name: Build shell: bash diff --git a/CMakeLists.txt b/CMakeLists.txt index 5f489ac29f2..4821aa878ef 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,7 +20,7 @@ string(TIMESTAMP TODAY "%Y%m%d") set(VERSION_SUFFIX "") project( ClamBCC - VERSION "0.105.0" + VERSION "1.3.0" DESCRIPTION "ClamAV Bytecode Compiler." ) set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH}) @@ -106,7 +106,7 @@ if(ENABLE_TESTS) find_package(ClamAV REQUIRED) endif() -find_package(LLVM 8 REQUIRED) +find_package(LLVM 16 REQUIRED) # Do not disable assertions based on CMAKE_BUILD_TYPE. foreach(_build_type "Release" "MinSizeRel" "RelWithDebInfo") diff --git a/Dockerfile b/Dockerfile deleted file mode 100644 index 8cc8664addc..00000000000 --- a/Dockerfile +++ /dev/null @@ -1,55 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-or-later -# -# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. -FROM registry.hub.docker.com/library/ubuntu:20.04 AS builder - -LABEL Maintainer="ClamAV bugs " - -WORKDIR /src -COPY . /src - -ENV DEBIAN_FRONTEND=noninteractive -ENV CC=clang-8 -ENV CXX=clang++-8 - -RUN apt-get update -y && \ - apt-get install -y \ - wget \ - libncurses5 \ - binutils \ - git \ - python3 \ - python3-distutils \ - python3-pip \ - cmake \ - make \ - clang-8 \ - clamav \ - && \ - rm -rf /var/lib/apt/lists/* && \ - python3 -m pip install pytest && \ - mkdir build && \ - cd build && \ - cmake .. -G "Unix Makefiles" \ - -D CMAKE_INSTALL_PREFIX=/usr \ - -D CMAKE_BUILD_TYPE=Release \ - -D ENABLE_EXAMPLES=OFF \ - && \ - make DESTDIR="/clambc" -j$(($(nproc) - 1)) && \ - make DESTDIR="/clambc" install && \ - ctest -V - -FROM registry.hub.docker.com/library/ubuntu:20.04 - -ENV DEBIAN_FRONTEND=noninteractive -ENV CC=clang-8 -ENV CXX=clang++-8 - -COPY --from=builder "/clambc" "/" - -RUN apt-get -y update && \ - apt install -y \ - python3 \ - clang-8 \ - && \ - rm -rf /var/lib/apt/lists/* diff --git a/NEWS.md b/NEWS.md index 7fa86a0a163..387f611562f 100644 --- a/NEWS.md +++ b/NEWS.md @@ -20,6 +20,22 @@ For example: > - [bytecode_api.h](headers/bytecode_api.h) > - [bytecode_local.h](headers/bytecode_local.h) +## `1.3.0-rc` + +➕ Upgrade bytecode compiler project to LLVM 16. + - The bytecode compiler project now builds multiple shared object files, + instead of just one with all of the passes. This is due to running with + the "new" pass manager, instead of running with the legacy pass manager, + as before. See https://llvm.org/docs/NewPassManager.html and + https://blog.llvm.org/posts/2021-03-26-the-new-pass-manager/ for more details. + - The bytecode compiler currently uses (deprecated) non-opaque pointers. + Updating to all opaque pointers will be required for the next release. + See https://llvm.org/docs/OpaquePointers.html for more information. + +🌌 New Requirements: + - LLVM 16 + - Clang 16 + ## `0.105.0` ➕ Complete overhaul of the bytecode compiler project. diff --git a/clam-format b/clam-format index 303e077d83c..f063eaa0e7b 100755 --- a/clam-format +++ b/clam-format @@ -1,6 +1,11 @@ #!/bin/bash -clang-format -style='{ Language: Cpp, UseTab: Never, IndentWidth: 4, AlignTrailingComments: true, AlignConsecutiveAssignments: true, AlignAfterOpenBracket: true, AlignEscapedNewlines: Left, AlignOperands: true, AllowShortFunctionsOnASingleLine: Empty, AllowShortIfStatementsOnASingleLine: true, AllowShortLoopsOnASingleLine: true, BreakBeforeBraces: Linux, BreakBeforeTernaryOperators: true, ColumnLimit: 0, FixNamespaceComments: true, SortIncludes: false, MaxEmptyLinesToKeep: 1, SpaceBeforeParens: ControlStatements, IndentCaseLabels: true, DerivePointerAlignment: true }' -dump-config > .clang-format +clang-format-16 -style='{ Language: Cpp, UseTab: Never, IndentWidth: 4, AlignTrailingComments: true, AlignConsecutiveAssignments: true, AlignAfterOpenBracket: true, AlignEscapedNewlines: Left, AlignOperands: true, AllowShortFunctionsOnASingleLine: Empty, AllowShortIfStatementsOnASingleLine: true, AllowShortLoopsOnASingleLine: true, BreakBeforeBraces: Linux, BreakBeforeTernaryOperators: true, ColumnLimit: 0, FixNamespaceComments: true, SortIncludes: false, MaxEmptyLinesToKeep: 1, SpaceBeforeParens: ControlStatements, IndentCaseLabels: true, DerivePointerAlignment: true }' -dump-config > .clang-format -clang-format -i -verbose libclambcc/*/*.cpp -clang-format -i -verbose libclambcc/*/*.h +clang-format-16 -i -verbose `find libclambcc -name "*.cpp"` +clang-format-16 -i -verbose `find libclambcc -name "*.h"` +clang-format-16 -i -verbose `find libclambcc -name "*.c"` + +clang-format-16 -i -verbose `find examples -name "*.cpp"` +clang-format-16 -i -verbose `find examples -name "*.h"` +clang-format-16 -i -verbose `find examples -name "*.c"` diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index 7bf72ab6a8d..cb8b304b554 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -11,13 +11,11 @@ #These are the list of supported versions -#consider changing this to start at 8 and go up to 99. That will cover us -#from having to update this when new versions come out. -CLANG_LLVM_KNOWN_VERSIONS = [8, 9, 10, 11, 12] +CLANG_LLVM_KNOWN_VERSIONS = [16] #This is the min clang/llvm version this has been tested with. -MIN_CLANG_LLVM_VERSION = 8 -PREFERRED_CLANG_LLVM_VERSION = 8 +MIN_CLANG_LLVM_VERSION = 16 +PREFERRED_CLANG_LLVM_VERSION = 16 CLANG_NAME = "clang" LLVM_NAME = "opt" @@ -31,37 +29,36 @@ OPEN_SOURCE_GENERATED_EXTENSION = "generated.c" OPTIMIZED_TMP_BYTECODE_FILE_EXTENSION = "optimized.tmp.ll" - -COMMON_WARNING_OPTIONS = "-Wno-backslash-newline-escape \ - -Wno-pointer-sign \ - -Wno-return-type \ - -Wno-incompatible-pointer-types \ - -Wno-unused-value \ - -Wno-shift-negative-value \ - -Wno-implicit-function-declaration \ - -Wno-incompatible-library-redeclaration \ - -Wno-implicit-int \ - -Wno-constant-conversion \ -" +COMMON_WARNING_OPTIONS = [ + "-Wno-backslash-newline-escape" + , "-Wno-pointer-sign" + , "-Wno-return-type" + , "-Wno-incompatible-pointer-types" + , "-Wno-unused-value" + , "-Wno-shift-negative-value" + , "-Wno-implicit-function-declaration" + , "-Wno-incompatible-library-redeclaration" + , "-Wno-implicit-int" + , "-Wno-constant-conversion" + ] TMPDIR=".__clambc_tmp" -INCDIR = Path(__file__).parent / '..' / 'include' +INCDIR = str(Path(__file__).parent / '..' / 'include') # Check for libclambcc.so at a location relative to this script first. FOUND_SHARED_OBJ = False SHARED_OBJ_DIR = Path(__file__).parent / '..' / 'lib' -if (SHARED_OBJ_DIR / 'libclambcc.so').exists(): - SHARED_OBJ_FILE = SHARED_OBJ_DIR / 'libclambcc.so' +if (SHARED_OBJ_DIR / 'libClamBCCommon.so').exists(): FOUND_SHARED_OBJ = True elif 'LD_LIBRARY_PATH' in os.environ: # Use LD_LIBRARY_PATH to try to find it. ld_library_paths = os.environ['LD_LIBRARY_PATH'].strip(' :').split(':') for lib_path in ld_library_paths: - if (Path(lib_path) / 'libclambcc.so').exists(): - SHARED_OBJ_FILE = Path(lib_path) / 'libclambcc.so' + if (Path(lib_path) / 'libClamBCCommon.so').exists(): + SHARED_OBJ_DIR = Path(lib_path) FOUND_SHARED_OBJ = True break @@ -109,10 +106,18 @@ def validate(self) -> bool: return True -def run(cmd: str) -> int: +def run(cmd: list) -> int: + cmd = ' '.join(cmd) if VERBOSE: print(cmd) - return os.system(cmd) + + ret = os.system(cmd) + if ret: + print (cmd) + print (ret) + sys.exit(1) + + return ret def die(msg: str, exitStatus: int) -> None: @@ -170,42 +175,40 @@ def compileFile(clangLLVM: ClangLLVM, fileName: str, debugBuild: bool, standardC outFile = getIrFile(fileName, debugBuild) - includePaths = "" + cmd = [] + cmd.append(clangLLVM.getClang()) + #cmd.append("-m32") #TODO: Put this back and resolve issues with it. + cmd.append("-S") + cmd.append("-fno-discard-value-names") + cmd.append("-Wno-implicit-function-declaration") + cmd.append("-fno-vectorize") + cmd.append("--language=c") + cmd.append("-emit-llvm") + cmd.append("-Werror=unused-command-line-argument") + cmd.append("-Xclang") + cmd.append("-disable-O0-optnone") + cmd.append("-Xclang -no-opaque-pointers") + cmd.append(fileName) + cmd.append("-o") + cmd.append(outFile) + cmd.append("-I") + cmd.append(INCDIR) + cmd.append("-include") + cmd.append("bytecode.h") + cmd.append("-D__CLAMBC__") + if options.includes: for i in options.includes: - includePaths += f"-I{i} " + cmd.append("-I") + cmd.append(i) - defines = "" if options.defines: for d in options.defines: - defines += f"-D{d} " - - cmd = f"{clangLLVM.getClang()} \ - -S \ - -fno-discard-value-names \ - --language=c \ - -emit-llvm \ - -Werror=unused-command-line-argument \ - -Xclang \ - -disable-O0-optnone \ - -o {outFile} \ - {fileName} \ - " - - cmd += f" \ - {includePaths} \ - {defines} \ - " + cmd.append('-D') + cmd.append(d) if debugBuild: - cmd += " -g \ - " - - if (not standardCompiler): - cmd += f" -I {INCDIR} \ - -include bytecode.h \ - -D__CLAMBC__ \ - " + cmd.append('-g') if options.disableCommonWarnings: cmd += COMMON_WARNING_OPTIONS @@ -251,7 +254,7 @@ def getInputSourceFileName(outputFileName: str) -> str: def getOptimizedTmpFileName(linkedFile: str) -> str: idx = linkedFile.find(LINKED_BYTECODE_FILE_EXTENSION) if -1 == idx: - die("getLinkedFileName called with invalid input", 2) + die("getOptimizedTmpFileName called with invalid input", 2) return f"{linkedFile[0:idx]}{OPTIMIZED_TMP_BYTECODE_FILE_EXTENSION}" @@ -260,8 +263,17 @@ def linkIRFiles(clangLLVM: ClangLLVM, linkedFile: str, irFiles: list) -> int: Given an output file name and list of IR files, link the IR files. Returns the exit status code for the call to `llvm-link`. ''' - inFiles = " ".join(irFiles) - cmd = f"{clangLLVM.getLLVMLink()} -S -o {linkedFile} {inFiles}" + cmd = [] + cmd.append(clangLLVM.getLLVMLink()) + cmd.append("-S") + cmd.append("-o") + cmd.append(linkedFile) + cmd += irFiles + + #TODO: Remove this in a future version, since it is a depracated option + # that will no longer be supported. For a detailed explanation, see + # https://llvm.org/docs/OpaquePointers.html + cmd.append("-opaque-pointers=0") return run(cmd) @@ -444,13 +456,14 @@ def getOutputString(linked: IRFile, ignore: IRFile) -> str: def createOptimizedTmpFile(clangLLVM: ClangLLVM, linkedFile: str) -> str: name = getOptimizedTmpFileName(linkedFile) - cmd = f"{clangLLVM.getOpt()} \ - -S \ - {linkedFile} \ - -o {name} \ - -internalize -internalize-public-api-list=entrypoint \ - -globalopt \ - " + cmd = [] + cmd.append(clangLLVM.getOpt()) + cmd.append("-S") + cmd.append(linkedFile) + cmd.append("-o") + cmd.append(name) + cmd.append("-internalize-public-api-list=entrypoint") + cmd.append('--passes="internalize,globalopt"') ret = run(cmd) if None == ret: @@ -491,109 +504,158 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: return res +INTERNALIZE_API_LIST=[ "_Z10entrypointv" + , "entrypoint" + , "__clambc_kind" + , "__clambc_virusname_prefix" + , "__clambc_virusnames" + , "__clambc_filesize" + , "__clambc_match_counts" + , "__clambc_match_offsets" + , "__clambc_pedata" + , "__Copyright" + ] + +OPTIMIZE_OPTIONS = ["-S" + , "--disable-loop-unrolling" + , " --disable-i2p-p2i-opt" + , " --disable-loop-unrolling" + , " --disable-promote-alloca-to-lds" + , " --disable-promote-alloca-to-vector" + , " --disable-simplify-libcalls" + , " --disable-tail-calls" + , " --vectorize-slp=false" + , " --vectorize-loops=false" + , " -internalize-public-api-list=\"%s\"" % ','.join(INTERNALIZE_API_LIST) + ] + +#TODO: Remove this when we properly handle opaque pointers. +OPTIMIZE_OPTIONS.append("-opaque-pointers=0") + +OPTIMIZE_PASSES = ["function(mem2reg)" + , 'verify' +# , 'clambc-remove-undefs' #TODO: This was added because the optimizer in llvm-8 was replacing unused + # parameters with 'undef' values in the IR. This was causing issues in + # the writer, not knowing what value to put in the signature. The llvm-16 + # optimizer no longer does this, so this value does not appear to still be + # needed. I have already done work upgrading the pass to the new + # pass manager, so I want to leave it in place throughout the -rc phase + # in case someone comes up with a testcase that re-introduces this bug. +# , 'verify' + , 'clambc-preserve-abis' + , 'verify' + , 'default' + , 'globalopt' + , 'clambc-preserve-abis' #remove fake function calls because O3 has already run + , 'verify' +# , 'clambc-remove-pointer-phis' +# , 'verify' + , 'clambc-remove-unsupported-icmp-intrinsics' + , 'verify' + , 'clambc-remove-usub' + , 'verify' + , 'clambc-remove-fshl' + , 'verify' + , 'clambc-lowering-notfinal' # perform lowering pass + , 'verify' + , 'lowerswitch' + , 'verify' + , 'clambc-remove-icmp-sle' + , 'verify' + , 'function(clambc-verifier)' + , 'verify' + , 'clambc-remove-freeze-insts' + , 'verify' + , 'clambc-lowering-notfinal' # perform lowering pass + , 'verify' + , 'clambc-lcompiler-helper' #compile the logical_trigger function to a + , 'verify' + , 'clambc-lcompiler' #compile the logical_trigger function to a + , 'verify' + , 'internalize' + , 'verify' + , 'clambc-rebuild' + , 'verify' + , 'clambc-remove-pointer-phis' + , 'verify' + , 'clambc-trace' + , 'verify' + , 'clambc-outline-endianness-calls' + , 'verify' +# , 'clambc-change-malloc-arg-size' #TODO: This was added because the legacy llvm runtime + # had issues with 32-bit phi nodes being used in + # calls to malloc. I already did the work to + # update it to the new pass manager, but it appears + # to no longer be necessary. I will remove it + # after the -rc phase if nobody has a testcase + # that requires it. +# , 'verify' + , 'clambc-extend-phis-to-64-bit' + , 'verify' + , 'clambc-convert-intrinsics-to-32Bit' + , 'verify' + , 'globalopt' + , 'clambc-prepare-geps-for-writer' + , 'verify' + , 'clambc-writer' + , 'verify' +] + +OPTIMIZE_LOADS=[ f"--load {SHARED_OBJ_DIR}/libClamBCCommon.so" +# , f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveundefs.so" #Not needed, since clambc-remove-undefs is not being used. + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCPreserveABIs.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUnsupportedICMPIntrinsics.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUSUB.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveFSHL.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemovePointerPHIs.so" #Not needed, since clambc-remove-pointer-phis is not being used. + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCLoweringNF.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveICMPSLE.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCVerifier.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveFreezeInsts.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCLoweringF.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCLogicalCompilerHelper.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCLogicalCompiler.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRebuild.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCTrace.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCOutlineEndiannessCalls.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCChangeMallocArgSize.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCExtendPHIsTo64Bit.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCConvertIntrinsicsTo32Bit.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCPrepareGEPsForWriter.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCAnalyzer.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRegAlloc.so" + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCWriter.so" +] + def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inputSourceFile: str, standardCompiler: bool) -> int: - internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata,__Copyright" - if standardCompiler: - internalizeAPIList += ",main" - - #TODO: Modify ClamBCRemoveUndefs to not require mem2reg to be run before it. - cmd = (f'{clangLLVM.getOpt()} ' - f' -S' - f' -verify-each' - f' -load "{SHARED_OBJ_FILE}"' - f' {inFile}' - f' -o {outFile}' - f' -mem2reg' - f' -clambc-remove-undefs' #add pointer bounds checking. - f' -clambc-preserve-abis' #add fake function calls that use all of - #the arguments so that O3 doesn't change - #the argument lists - f' -O3' - f' -clambc-preserve-abis' #remove fake function calls because O3 has already run - f' -clambc-remove-pointer-phis' - f' -dce' - f' -disable-loop-unrolling' - f' -disable-loop-vectorization' - f' -disable-slp-vectorization' - f' -globaldce' - f' -strip-dead-prototypes' - f' -constmerge' - f' -mem2reg' - f' -always-inline' - f' -globalopt' - f' -lowerswitch' - f' -lowerinvoke' - f' -globalopt' - f' -simplifycfg' - f' -indvars' - f' -constprop' - f' -clambc-lowering-notfinal' # perform lowering pass - f' -lowerswitch' - f' -clambc-verifier' - f' -clambc-lowering-notfinal' # perform lowering pass - f' -dce' - f' -simplifycfg' - f' -mem2reg' - f' -clambc-lcompiler' #compile the logical_trigger function to a - #logical signature. - f' -internalize -internalize-public-api-list="{internalizeAPIList}"' - f' -globaldce' - f' -instcombine' - f' -clambc-rebuild' - f' -verify' - f' -simplifycfg' - f' -dce' - f' -lowerswitch' - f' -clambc-verifier' - f' -verify' - f' -strip-debug-declare' - f' -clambc-lowering-final' - f' -clambc-trace' - f' -dce' - f' -clambc-module' - f' -verify' - f' -globalopt' - f' -remove-selects' - f' -clambc-outline-endianness-calls' #outline the endianness calls - #because otherwise the call - #is replaced with a constant - #that is based on where the - #signature was compiled, and - #won't always be accurate. - f' -clambc-change-malloc-arg-size' #make sure we always use the - #64-bit malloc. - f' -globalopt' - f' -clambc-extend-phis-to-64bit' #make all integer phi nodes 64-bit - #because the llvm runtime inserts a - #cast after phi nodes without - #verifying that there is not - #another phi node after it. - f' -clambc-prepare-geps-for-writer' #format gep indexes to not not - #have more than 2, because - #otherwise the writer gets - #unhappy. - f' -globalopt' - f' -clambc-convert-intrinsics' #convert all memset intrinsics to - #the 32-bit instead of the 64-bit - #intrinsic - f' -clambc-writer' #write the bytecode - f' -clambc-writer-input-source={inputSourceFile}' - f' -clambc-sigfile={sigFile}' - ) - - if standardCompiler: - cmd += f" -clambc-standard-compiler" + cmd = [] + cmd.append(clangLLVM.getOpt()) + cmd.append(inFile) + cmd.append('-o') + cmd.append(outFile) + cmd += OPTIMIZE_OPTIONS + cmd += OPTIMIZE_LOADS + + s = '--passes="' + first = True + for v in OPTIMIZE_PASSES: + if first: + first = False + else: + s += ',' + s += v + s += '"' + cmd.append(s) - return run(cmd) + cmd.append(f'-clambc-writer-input-source={inputSourceFile}') + cmd.append(f'-clambc-sigfile={sigFile}') -def genExe(clangLLVM: ClangLLVM, optimizedFile: str, outputFile: str) -> int: - cmd = f"{clangLLVM.getClang} {optimizedFile} -o {outputFile}" return run(cmd) -#This is definitely hacky, but I *think* it's the only change I need to make for +#This is definitely hacky, but it's the only change I need to make for #this to work def fixFileSize(optimizedFile: str) -> None: f = open(optimizedFile) @@ -779,16 +841,11 @@ def main(): parser.add_option(CLANG_BINARY_ARG, dest="clangBinary", help="Path to clang binary") parser.add_option(OPT_BINARY_ARG, dest="optBinary", help="Path to opt binary") -# parser.add_option("--generate-exe", dest="genexe", action="store_true", -# default=False, help="This is if you want to build a correctly formatted bytecode \ -# signature as an executable for debugging (NOT IMPLEMENTED)") parser.add_option("-I", action="append", dest="includes", default=None) parser.add_option("-D", action="append", dest="defines", default=None) parser.add_option("--disable-common-warnings", dest="disableCommonWarnings", - action="store_true", default=False, - help=f"{COMMON_WARNING_OPTIONS} (Found in some bytecode signatures).") -# parser.add_option("--standard-compiler", dest="standardCompiler", action="store_true", default=False, -# help="This is if you want to build a normal c program as an executable to test the compiler.") + action="store_true", default=True, + help="{%s} (Found in some bytecode signatures)." % (' '.join(COMMON_WARNING_OPTIONS))) (options, args) = parser.parse_args() if options.version: @@ -800,13 +857,10 @@ def main(): if None == clangLLVM: sys.exit(1) - options.genexe = False - options.standardCompiler = False - - options.passthroughOptions = " ".join(parser.getPassthrough()) + options.passthroughOptions = parser.getPassthrough() if not FOUND_SHARED_OBJ: - die(f"libclambcc.so not found. See instructions for building", 2) + die(f"Shared objects not found. See instructions for building", 2) if 0 == len(args): dieNoInputFile() @@ -817,26 +871,14 @@ def main(): outFile = getOutfile(options, args) outFile = os.path.basename(outFile) saveFiles = options.save - bCompiler = options.standardCompiler - buildExecutable = bCompiler or options.genexe createdDir = False - #Add the compiled bytecode file extension, so that all the getName functions can find it - if bCompiler: - idx = outFile.find(COMPILED_BYTECODE_FILE_EXTENSION) - if -1 == idx: - outFile += f".{COMPILED_BYTECODE_FILE_EXTENSION}" - if not os.path.isdir(TMPDIR): os.makedirs(TMPDIR) createdDir = True -# if options.genexe: -# inFile = os.path.join(os.path.dirname(__file__), 'clambc-compiler-main.c') -# args.append(inFile) -# - res = compileFiles(clangLLVM, args, False, bCompiler, options) + res = compileFiles(clangLLVM, args, False, False, options) if not res: linkedFile = getLinkedFileName(outFile) @@ -844,40 +886,12 @@ def main(): if not res: inputSourceFile = getInputSourceFileName(outFile) - if bCompiler: - f = open(inputSourceFile, "w") - f.close() - else: - res = createInputSourceFile(clangLLVM, inputSourceFile, args, options) + res = createInputSourceFile(clangLLVM, inputSourceFile, args, options) if not res: optimizedFile = getOptimizedFileName(outFile) outFile = getOutfile(options, args) - res = optimize(clangLLVM, linkedFile, optimizedFile, outFile, inputSourceFile, bCompiler) - - if not res: - if options.genexe: - - #Add the 'main' and all the stuff that clam provides (TODO: make this configurable by the user) - mainFile = os.path.join(os.path.dirname(__file__), 'clambc-compiler-main.c') - res = compileFile(clangLLVM, mainFile, False, False, options) - if res: - print("Build FAILED") - import pdb ; pdb.set_trace() - - if not res: - mainIRFile = getIrFile(mainFile, False) - - fixFileSize(optimizedFile) - fixFileSize(mainIRFile) - - res = linkIRFiles(clangLLVM, optimizedFile, [optimizedFile, mainIRFile]) - - bCompiler = True - - if not res: - if bCompiler: - res = genExe(clangLLVM, optimizedFile, outFile) + res = optimize(clangLLVM, linkedFile, optimizedFile, outFile, inputSourceFile, False) if ((not saveFiles) and createdDir): shutil.rmtree(TMPDIR) @@ -891,3 +905,6 @@ def main(): if '__main__' == __name__: main() + + + diff --git a/cmake/FindClamAV.cmake b/cmake/FindClamAV.cmake index 0a23a1bd9d4..1a5f634716c 100644 --- a/cmake/FindClamAV.cmake +++ b/cmake/FindClamAV.cmake @@ -26,7 +26,7 @@ find_program(clambc_EXECUTABLE HINTS "${ClamAV_HOME}" PATH_SUFFIXES "bin" ) -if(NOT clambc_EXECUTABLE_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) +if(NOT clambc_EXECUTABLE AND NOT ClamAV_FIND_QUIETLY) message("Unable to find clambc") endif() diff --git a/cmake/FindClang.cmake b/cmake/FindClang.cmake index 4db126c9edc..9ba32f096f5 100644 --- a/cmake/FindClang.cmake +++ b/cmake/FindClang.cmake @@ -30,7 +30,7 @@ #============================================================================= -set(KNOWN_VERSIONS 11 10 9 8 7 6.0 5.0 4.0 3.9 3.8) +set(KNOWN_VERSIONS 16) foreach(version ${KNOWN_VERSIONS}) if(DEFINED Clang_FIND_VERSION AND Clang_FIND_VERSION VERSION_EQUAL version) diff --git a/cmake/FindLLVM.cmake b/cmake/FindLLVM.cmake index 9e94b2d5096..fdfecb26b14 100644 --- a/cmake/FindLLVM.cmake +++ b/cmake/FindLLVM.cmake @@ -44,7 +44,6 @@ elseif(NOT LLVM_CONFIG_EXECUTABLE) foreach(i RANGE 0 9) list(APPEND LLVM_FIND_VERSION_CONCAT llvm-config${LLVM_FIND_VERSION_CONCAT_PREFIX}${i}) endforeach() - message("llvm-config list: ${LLVM_FIND_VERSION_CONCAT}") find_program(LLVM_CONFIG_EXECUTABLE NAMES llvm-config-${LLVM_FIND_VERSION} ${LLVM_FIND_VERSION_CONCAT} llvm-config DOC "llvm-config executable") @@ -133,7 +132,7 @@ if(LLVM_FOUND) OUTPUT_STRIP_TRAILING_WHITESPACE ) - if(NOT ${LLVM_VERSION} VERSION_LESS "3.8.0") + if(NOT ${LLVM_VERSION} VERSION_LESS "16") execute_process( COMMAND ${LLVM_CONFIG_EXECUTABLE} --shared-mode OUTPUT_VARIABLE _LLVM_SHARED_MODE @@ -148,16 +147,6 @@ if(LLVM_FOUND) set(LLVM_SHARED_MODE OFF) endif() - # potentially add include dir from binary dir for non-installed LLVM - execute_process( - COMMAND ${LLVM_CONFIG_EXECUTABLE} --src-root - OUTPUT_VARIABLE _llvmSourceRoot - OUTPUT_STRIP_TRAILING_WHITESPACE - ) - string(FIND "${LLVM_INCLUDE_DIRS}" "${_llvmSourceRoot}" _llvmIsInstalled) - if(NOT _llvmIsInstalled) - list(APPEND LLVM_INCLUDE_DIRS "${LLVM_INSTALL_PREFIX}/include") - endif() endif() if(LLVM_FIND_REQUIRED AND NOT LLVM_FOUND) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index dea625e7cb8..369f3cc1bdd 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -1,67 +1,9 @@ # Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. -# -# The hello object library -# -add_library(hello_obj OBJECT) -target_sources(hello_obj - PRIVATE - HelloWorld/HelloWorld.cpp -) -target_include_directories(hello_obj - PRIVATE - ../libclambcc # HACK: For Common/clambc.h - ${LLVM_INCLUDE_DIRS} -) +#'PassManager' is using the 'new' passmanager. This was added +#for the upgrade to llvm 16, although the 'new' pass manager +#has been around a while. -set_target_properties(hello_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") - -# -# For testing -# -#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) - -# -# The hello shared library. -# -add_library( hello SHARED ) -target_link_libraries( hello - PUBLIC - hello_obj ) -set_target_properties( hello PROPERTIES - VERSION ${LIBCLAMBC_VERSION} - SOVERSION ${LIBCLAMBC_SOVERSION} ) - -target_link_directories(hello_obj PRIVATE ${LLVM_LIBRARY_DIRS}) -target_link_libraries(hello_obj PUBLIC ${LLVM_LIBS}) - -if(WIN32) - install(TARGETS hello DESTINATION .) - - # Also install shared library (DLL) dependencies - install(CODE [[ - file(GET_RUNTIME_DEPENDENCIES - LIBRARIES - $ - RESOLVED_DEPENDENCIES_VAR _r_deps - UNRESOLVED_DEPENDENCIES_VAR _u_deps - DIRECTORIES - ${LLVM_LIBRARY_DIRS} - ) - foreach(_file ${_r_deps}) - string(TOLOWER ${_file} _file_lower) - if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") - file(INSTALL - DESTINATION "${CMAKE_INSTALL_PREFIX}" - TYPE SHARED_LIBRARY - FOLLOW_SYMLINK_CHAIN - FILES "${_file}" - ) - endif() - endforeach() - #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") - ]]) -else() - install(TARGETS hello DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() +add_subdirectory(LegacyPassManager) +add_subdirectory(PassManager) diff --git a/examples/LegacyPassManager/CMakeLists.txt b/examples/LegacyPassManager/CMakeLists.txt new file mode 100644 index 00000000000..42b98f73f6d --- /dev/null +++ b/examples/LegacyPassManager/CMakeLists.txt @@ -0,0 +1,43 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The hello object library +# +add_library(hello_obj OBJECT) +target_sources(hello_obj + PRIVATE + HelloWorld/HelloWorld.cpp +) + +target_include_directories(hello_obj + PRIVATE + ../libclambcc # HACK: For Common/clambc.h + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(hello_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) + +# +# The hello shared library. +# +add_library( hello SHARED ) +target_link_libraries( hello + PUBLIC + hello_obj ) +set_target_properties( hello PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(hello_obj PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(hello_obj PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS hello DESTINATION .) +else() + install(TARGETS hello DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() diff --git a/examples/HelloWorld/HelloWorld.cpp b/examples/LegacyPassManager/HelloWorld/HelloWorld.cpp similarity index 68% rename from examples/HelloWorld/HelloWorld.cpp rename to examples/LegacyPassManager/HelloWorld/HelloWorld.cpp index cdd120b5e7a..65e0620ac55 100644 --- a/examples/HelloWorld/HelloWorld.cpp +++ b/examples/LegacyPassManager/HelloWorld/HelloWorld.cpp @@ -10,27 +10,26 @@ #include "llvm/IR/LegacyPassManager.h" #include "llvm/Transforms/IPO/PassManagerBuilder.h" -#include "Common/clambc.h" using namespace llvm; -namespace { +namespace +{ struct Hello : public FunctionPass { - static char ID; - Hello() : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override { - errs() << "Hello: "; - errs().write_escaped(F.getName()) << '\n'; - return false; - } + static char ID; + Hello() + : FunctionPass(ID) {} + + bool runOnFunction(Function &F) override + { + errs() << "Hello: "; + errs().write_escaped(F.getName()) << '\n'; + return false; + } }; // end of struct Hello -} // end of anonymous namespace +} // end of anonymous namespace char Hello::ID = 0; static RegisterPass X("hello", "Hello World Pass", false /* Only looks at CFG */, false /* Analysis Pass */); - - - diff --git a/examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp b/examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp new file mode 100644 index 00000000000..1ecde20d59b --- /dev/null +++ b/examples/PassManager/AnalysisPlugin/AnalysisPlugin.cpp @@ -0,0 +1,125 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include +#include +#include + +#include +#include +#include + +#include + +using namespace llvm; +using namespace std; + +/* Modeled after CallGraphAnalysis */ + +namespace +{ + +class AnalysisResult +{ + public: + AnalysisResult() + { + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" + << "\n"; + } +}; + +class ExampleAnalysis : public AnalysisInfoMixin +{ + + public: + friend AnalysisInfoMixin; + static AnalysisKey Key; + + ExampleAnalysis() + { + } + + typedef AnalysisResult Result; + + AnalysisResult run(llvm::Module &F, llvm::ModuleAnalysisManager &fam) + { + + llvm::errs() << "<" + << "Analysis::" << __LINE__ << ">" + << "\n"; + return AnalysisResult(); + } +}; + +AnalysisKey ExampleAnalysis::Key; + +struct ExamplePass : public PassInfoMixin { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + public: + virtual ~ExamplePass() {} + + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" + << "Transform Pass" + << "\n"; + + MAM.getResult(m); + + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" + << "Transform Pass (leaving)" + << "\n"; + + return PreservedAnalyses::all(); + } +}; // end of struct ExamplePass + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ExamplePass", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "example-pass-with-analysis") { + FPM.addPass(ExamplePass()); + return true; + } + return false; + }); + + PB.registerAnalysisRegistrationCallback( + [](ModuleAnalysisManager &mam) { + mam.registerPass([]() { return ExampleAnalysis(); }); + }); + }}; +} diff --git a/examples/PassManager/AnalysisPlugin/CMakeLists.txt b/examples/PassManager/AnalysisPlugin/CMakeLists.txt new file mode 100644 index 00000000000..e42c9f45e71 --- /dev/null +++ b/examples/PassManager/AnalysisPlugin/CMakeLists.txt @@ -0,0 +1,48 @@ +# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The analysisplugin object library +# +add_library(analysisplugin_obj OBJECT) +target_sources(analysisplugin_obj + PRIVATE + AnalysisPlugin.cpp +) + +target_include_directories(analysisplugin_obj + PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + . # For Common/clambc.h + .. # For clambc.h #TODO: change all passes to use "Common" and then delete this line. + ${LLVM_INCLUDE_DIRS} +) + +set_target_properties(analysisplugin_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") + +# +# For testing +# +#target_compile_definitions(analysisplugin_obj -DLOG_BEFORE_AFTER=1) + +# +# The analysisplugin shared library. +# +add_library( analysisplugin SHARED ) +target_link_libraries( analysisplugin + PUBLIC + analysisplugin_obj ) +set_target_properties( analysisplugin PROPERTIES + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION} ) + +target_link_directories(analysisplugin PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(analysisplugin PUBLIC ${LLVM_LIBS}) + +if(WIN32) + install(TARGETS analysisplugin DESTINATION .) +else() + install(TARGETS analysisplugin DESTINATION ${CMAKE_INSTALL_LIBDIR}) +endif() + + + diff --git a/examples/PassManager/CMakeLists.txt b/examples/PassManager/CMakeLists.txt new file mode 100644 index 00000000000..3d8b83e6667 --- /dev/null +++ b/examples/PassManager/CMakeLists.txt @@ -0,0 +1,3 @@ +# Copyright (C) 2021-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +add_subdirectory(AnalysisPlugin) diff --git a/examples/PassManager/input/compile.sh b/examples/PassManager/input/compile.sh new file mode 100755 index 00000000000..ac8d245db5f --- /dev/null +++ b/examples/PassManager/input/compile.sh @@ -0,0 +1,47 @@ +#!/bin/bash + + +SOURCE_FILE=analysis_test.c + +echo "#include " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo "int func2(int i){ " >> $SOURCE_FILE +echo " return i/2; " >> $SOURCE_FILE +echo "} " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo "int func(int idx){ " >> $SOURCE_FILE +echo " int tmp; " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " if (idx > 1){ " >> $SOURCE_FILE +echo " tmp = func2(11); " >> $SOURCE_FILE +echo " } else { " >> $SOURCE_FILE +echo " tmp = func(idx-1); " >> $SOURCE_FILE +echo " } " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " if (0 == tmp){ " >> $SOURCE_FILE +echo " return 0; " >> $SOURCE_FILE +echo " } " >> $SOURCE_FILE +echo " return idx-1; " >> $SOURCE_FILE +echo "} " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo "int main(int argc, char ** argv){ " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " if (argc){ " >> $SOURCE_FILE +echo " func(argc); " >> $SOURCE_FILE +echo " } " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE +echo " return 0; " >> $SOURCE_FILE +echo "} " >> $SOURCE_FILE +echo " " >> $SOURCE_FILE + + +clang-16 \ + -S \ + -fno-discard-value-names \ + --language=c \ + -emit-llvm \ + -Werror=unused-command-line-argument \ + -Xclang \ + -disable-O0-optnone \ + $SOURCE_FILE diff --git a/examples/PassManager/input/run_opt.sh b/examples/PassManager/input/run_opt.sh new file mode 100755 index 00000000000..77544e4eb59 --- /dev/null +++ b/examples/PassManager/input/run_opt.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +opt-16 -load-pass-plugin examples/NewPassManager/AnalysisPlugin/libanalysisplugin.so -passes=example-pass-with-analysis analysis_test.ll -o analysis_test.t.ll + diff --git a/headers/bcfeatures.h b/headers/bcfeatures.h index 96883abcde9..cf556b38917 100644 --- a/headers/bcfeatures.h +++ b/headers/bcfeatures.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * Authors: Török Edvin diff --git a/headers/bytecode_api.h b/headers/bytecode_api.h index ea0e3544733..04cdf7da2dd 100644 --- a/headers/bytecode_api.h +++ b/headers/bytecode_api.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * Authors: Török Edvin, Kevin Lin @@ -152,13 +152,27 @@ enum FunctionalityLevels { FUNC_LEVEL_0103_4 = 125, /**< LibClamAV release 0.103.4 */ FUNC_LEVEL_0103_5 = 126, /**< LibClamAV release 0.103.5 */ FUNC_LEVEL_0103_6 = 127, /**< LibClamAV release 0.103.6 */ + FUNC_LEVEL_0103_7 = 128, /**< LibClamAV release 0.103.7 */ + FUNC_LEVEL_0103_8 = 129, /**< LibClamAV release 0.103.8 */ FUNC_LEVEL_0104 = 140, /**< LibClamAV release 0.104.0 */ FUNC_LEVEL_0104_1 = 141, /**< LibClamAV release 0.104.1 */ FUNC_LEVEL_0104_2 = 142, /**< LibClamAV release 0.104.2 */ FUNC_LEVEL_0104_3 = 143, /**< LibClamAV release 0.104.3 */ + FUNC_LEVEL_0104_4 = 144, /**< LibClamAV release 0.104.4 */ - FUNC_LEVEL_0105 = 150, /**< LibClamAV release 0.105.0 */ + FUNC_LEVEL_0105 = 150, /**< LibClamAV release 0.105.0 */ + FUNC_LEVEL_0105_1 = 151, /**< LibClamAV release 0.105.1 */ + FUNC_LEVEL_0105_2 = 152, /**< LibClamAV release 0.105.2 */ + + FUNC_LEVEL_1_0 = 160, /**< LibClamAV release 1.0.0 */ + FUNC_LEVEL_1_0_1 = 161, /**< LibClamAV release 1.0.1 */ + + FUNC_LEVEL_1_1 = 180, /**< LibClamAV release 1.1.0 */ + + FUNC_LEVEL_1_2 = 190, /**< LibClamAV release 1.2.0 */ + + FUNC_LEVEL_1_3 = 200, /**< LibClamAV release 1.3.0 */ }; /** diff --git a/headers/bytecode_api_decl.c.h b/headers/bytecode_api_decl.c.h index 4328796bfbc..c00684e0859 100644 --- a/headers/bytecode_api_decl.c.h +++ b/headers/bytecode_api_decl.c.h @@ -2,7 +2,7 @@ * ClamAV bytecode internal API * This is an automatically generated file! * - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * * Redistribution and use in source and binary forms, with or without diff --git a/headers/bytecode_detect.h b/headers/bytecode_detect.h index 71cc195f15a..039446b61f6 100644 --- a/headers/bytecode_detect.h +++ b/headers/bytecode_detect.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2022 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * * Redistribution and use in source and binary forms, with or without diff --git a/libclambcc/CMakeLists.txt b/libclambcc/CMakeLists.txt index 4812a8f87ca..340caeda6cd 100644 --- a/libclambcc/CMakeLists.txt +++ b/libclambcc/CMakeLists.txt @@ -1,89 +1,391 @@ -# Copyright (C) 2021 Cisco Systems, Inc. and/or its affiliates. All rights reserved. - -# -# The clambcc object library -# -add_library(clambcc_obj OBJECT) -target_sources(clambcc_obj - PRIVATE - ClamBCLowering/ClamBCLowering.cpp - ClamBCVerifier/ClamBCVerifier.cpp - ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp - ClamBCRebuild/ClamBCRebuild.cpp - ClamBCTrace/ClamBCTrace.cpp - ClamBCModule/ClamBCModule.cpp - ClamBCWriter/ClamBCWriter.cpp - ClamBCAnalyzer/ClamBCAnalyzer.cpp - Common/ClamBCDiagnostics.cpp - Common/ClamBCUtilities.cpp - Common/ClamBCRegAlloc.cpp - Common/version.c - ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp - ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp - ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp - ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp - ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp - ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp - ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp - ClamBCPreserveABIs/ClamBCPreserveABIs.cpp - ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp -) - -target_include_directories(clambcc_obj - PRIVATE - ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) - . # For Common/clambc.h - Common # For clambc.h #TODO: change all passes to use "Common" and then delete this line. - ${LLVM_INCLUDE_DIRS} -) - -set_target_properties(clambcc_obj PROPERTIES COMPILE_FLAGS "${WARNCXXFLAGS}") - -# -# For testing -# -#target_compile_definitions(clambc_obj -DLOG_BEFORE_AFTER=1) - -# -# The clambcc shared library. -# -add_library( clambcc SHARED ) -target_link_libraries( clambcc - PUBLIC - clambcc_obj ) -set_target_properties( clambcc PROPERTIES - VERSION ${LIBCLAMBC_VERSION} - SOVERSION ${LIBCLAMBC_SOVERSION} ) - -target_link_directories(clambcc PRIVATE ${LLVM_LIBRARY_DIRS}) -target_link_libraries(clambcc PUBLIC ${LLVM_LIBS}) - -if(WIN32) - install(TARGETS clambcc DESTINATION .) - - # Also install shared library (DLL) dependencies - install(CODE [[ - file(GET_RUNTIME_DEPENDENCIES - LIBRARIES - $ - RESOLVED_DEPENDENCIES_VAR _r_deps - UNRESOLVED_DEPENDENCIES_VAR _u_deps - DIRECTORIES - ${LLVM_LIBRARY_DIRS} - ) - foreach(_file ${_r_deps}) - string(TOLOWER ${_file} _file_lower) - if(NOT ${_file_lower} MATCHES "c:[\\/]windows[\\/]system32.*") - file(INSTALL - DESTINATION "${CMAKE_INSTALL_PREFIX}" - TYPE SHARED_LIBRARY - FOLLOW_SYMLINK_CHAIN - FILES "${_file}" - ) - endif() - endforeach() - #message("UNRESOLVED_DEPENDENCIES_VAR: ${_u_deps}") - ]]) -else() - install(TARGETS clambcc DESTINATION ${CMAKE_INSTALL_LIBDIR}) -endif() +# Copyright (C) 2021-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + +# +# The common shared library. +# +add_library(ClamBCCommon SHARED + ClamBCDiagnostics.cpp + ClamBCUtilities.cpp + version.c) +target_include_directories(ClamBCCommon PRIVATE + ${CMAKE_BINARY_DIR} # For clambc-version.h (generated file) + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCCommon PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCCommon -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCCommon PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCCommon PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCCommon DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCAnalyzer shared library. +# +add_library(ClamBCAnalyzer SHARED + ClamBCAnalyzer.cpp) +target_include_directories(ClamBCAnalyzer PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCAnalyzer PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCAnalyzer -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCAnalyzer PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCAnalyzer PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCAnalyzer DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCChangeMallocArgSize shared library. +# +add_library(ClamBCChangeMallocArgSize SHARED + ClamBCChangeMallocArgSize.cpp) +target_include_directories(ClamBCChangeMallocArgSize PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCChangeMallocArgSize PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCChangeMallocArgSize -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCChangeMallocArgSize PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCChangeMallocArgSize PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCChangeMallocArgSize DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCConvertIntrinsicsTo32Bit shared library. +# +add_library(ClamBCConvertIntrinsicsTo32Bit SHARED + ClamBCConvertIntrinsicsTo32Bit.cpp) +target_include_directories(ClamBCConvertIntrinsicsTo32Bit PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCConvertIntrinsicsTo32Bit PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCConvertIntrinsicsTo32Bit -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCConvertIntrinsicsTo32Bit PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCConvertIntrinsicsTo32Bit PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCConvertIntrinsicsTo32Bit DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCExtendPHIsTo64Bit shared library. +# +add_library(ClamBCExtendPHIsTo64Bit SHARED + ClamBCExtendPHIsTo64Bit.cpp) +target_include_directories(ClamBCExtendPHIsTo64Bit PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCExtendPHIsTo64Bit PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCExtendPHIsTo64Bit -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCExtendPHIsTo64Bit PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCExtendPHIsTo64Bit PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCExtendPHIsTo64Bit DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCLogicalCompiler shared library. +# +add_library(ClamBCLogicalCompiler SHARED + ClamBCLogicalCompiler.cpp) +target_include_directories(ClamBCLogicalCompiler PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCLogicalCompiler PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCLogicalCompiler -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCLogicalCompiler PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCLogicalCompiler PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCLogicalCompiler DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCLogicalCompilerHelper shared library. +# +add_library(ClamBCLogicalCompilerHelper SHARED + ClamBCLogicalCompilerHelper.cpp) +target_include_directories(ClamBCLogicalCompilerHelper PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCLogicalCompilerHelper PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCLogicalCompilerHelper -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCLogicalCompilerHelper PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCLogicalCompilerHelper PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCLogicalCompilerHelper DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCLoweringF shared library. +# +add_library(ClamBCLoweringF SHARED + ClamBCLowering.cpp + ClamBCLoweringF.cpp) +target_include_directories(ClamBCLoweringF PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCLoweringF PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCLoweringF -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCLoweringF PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCLoweringF PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCLoweringF DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCLoweringNF shared library. +# +add_library(ClamBCLoweringNF SHARED + ClamBCLowering.cpp + ClamBCLoweringNF.cpp) +target_include_directories(ClamBCLoweringNF PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCLoweringNF PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCLoweringNF -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCLoweringNF PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCLoweringNF PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCLoweringNF DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCOutlineEndiannessCalls shared library. +# +add_library(ClamBCOutlineEndiannessCalls SHARED + ClamBCOutlineEndiannessCalls.cpp) +target_include_directories(ClamBCOutlineEndiannessCalls PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCOutlineEndiannessCalls PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCOutlineEndiannessCalls -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCOutlineEndiannessCalls PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCOutlineEndiannessCalls PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCOutlineEndiannessCalls DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCPrepareGEPsForWriter shared library. +# +add_library(ClamBCPrepareGEPsForWriter SHARED + ClamBCPrepareGEPsForWriter.cpp) +target_include_directories(ClamBCPrepareGEPsForWriter PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCPrepareGEPsForWriter PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCPrepareGEPsForWriter -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCPrepareGEPsForWriter PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCPrepareGEPsForWriter PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCPrepareGEPsForWriter DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCPreserveABIs shared library. +# +add_library(ClamBCPreserveABIs SHARED + ClamBCPreserveABIs.cpp) +target_include_directories(ClamBCPreserveABIs PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCPreserveABIs PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCPreserveABIs -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCPreserveABIs PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCPreserveABIs PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCPreserveABIs DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRebuild shared library. +# +add_library(ClamBCRebuild SHARED + ClamBCRebuild.cpp) +target_include_directories(ClamBCRebuild PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRebuild PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRebuild -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRebuild PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRebuild PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRebuild DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRegAlloc shared library. +# +add_library(ClamBCRegAlloc SHARED + ClamBCRegAlloc.cpp) +target_include_directories(ClamBCRegAlloc PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRegAlloc PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRegAlloc -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRegAlloc PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRegAlloc PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRegAlloc DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRemoveFreezeInsts shared library. +# +add_library(ClamBCRemoveFreezeInsts SHARED + ClamBCRemoveFreezeInsts.cpp) +target_include_directories(ClamBCRemoveFreezeInsts PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemoveFreezeInsts PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemoveFreezeInsts -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemoveFreezeInsts PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemoveFreezeInsts PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemoveFreezeInsts DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRemoveFSHL shared library. +# +add_library(ClamBCRemoveFSHL SHARED + ClamBCRemoveFSHL.cpp) +target_include_directories(ClamBCRemoveFSHL PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemoveFSHL PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemoveFSHL -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemoveFSHL PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemoveFSHL PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemoveFSHL DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRemoveICMPSLE shared library. +# +add_library(ClamBCRemoveICMPSLE SHARED + ClamBCRemoveICMPSLE.cpp) +target_include_directories(ClamBCRemoveICMPSLE PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemoveICMPSLE PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemoveICMPSLE -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemoveICMPSLE PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemoveICMPSLE PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemoveICMPSLE DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRemovePointerPHIs shared library. +# +add_library(ClamBCRemovePointerPHIs SHARED +ClamBCRemovePointerPHIs.cpp) +target_include_directories(ClamBCRemovePointerPHIs PRIVATE +${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemovePointerPHIs PROPERTIES +COMPILE_FLAGS "${WARNCXXFLAGS}" +VERSION ${LIBCLAMBC_VERSION} +SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemovePointerPHIs -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemovePointerPHIs PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemovePointerPHIs PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemovePointerPHIs DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# # +# # The ClamBCRemoveUndefs shared library. +# # +# add_library(ClamBCRemoveUndefs SHARED +# ClamBCRemoveUndefs.cpp) +# target_include_directories(ClamBCRemoveUndefs PRIVATE +# ${LLVM_INCLUDE_DIRS}) +# set_target_properties(ClamBCRemoveUndefs PROPERTIES +# COMPILE_FLAGS "${WARNCXXFLAGS}" +# VERSION ${LIBCLAMBC_VERSION} +# SOVERSION ${LIBCLAMBC_SOVERSION}) +# #target_compile_definitions(ClamBCRemoveUndefs -DLOG_BEFORE_AFTER=1) # For testing +# target_link_directories(ClamBCRemoveUndefs PRIVATE ${LLVM_LIBRARY_DIRS}) +# target_link_libraries(ClamBCRemoveUndefs PUBLIC ${LLVM_LIBS}) +# install(TARGETS ClamBCRemoveUndefs DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRemoveUnsupportedICMPIntrinsics shared library. +# +add_library(ClamBCRemoveUnsupportedICMPIntrinsics SHARED + ClamBCRemoveUnsupportedICMPIntrinsics.cpp) +target_include_directories(ClamBCRemoveUnsupportedICMPIntrinsics PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemoveUnsupportedICMPIntrinsics PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemoveUnsupportedICMPIntrinsics -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemoveUnsupportedICMPIntrinsics PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemoveUnsupportedICMPIntrinsics PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemoveUnsupportedICMPIntrinsics DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCRemoveUSUB shared library. +# +add_library(ClamBCRemoveUSUB SHARED + ClamBCRemoveUSUB.cpp) +target_include_directories(ClamBCRemoveUSUB PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemoveUSUB PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemoveUSUB -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemoveUSUB PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemoveUSUB PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemoveUSUB DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCTrace shared library. +# +add_library(ClamBCTrace SHARED + ClamBCTrace.cpp) +target_include_directories(ClamBCTrace PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCTrace PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCTrace -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCTrace PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCTrace PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCTrace DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCVerifier shared library. +# +add_library(ClamBCVerifier SHARED + ClamBCVerifier.cpp) +target_include_directories(ClamBCVerifier PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCVerifier PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCVerifier -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCVerifier PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCVerifier PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCVerifier DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +# +# The ClamBCWriter shared library. +# +add_library(ClamBCWriter SHARED + ClamBCWriter.cpp) +target_include_directories(ClamBCWriter PRIVATE + ${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCWriter PROPERTIES + COMPILE_FLAGS "${WARNCXXFLAGS}" + VERSION ${LIBCLAMBC_VERSION} + SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCWriter -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCWriter PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCWriter PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCWriter DESTINATION ${CMAKE_INSTALL_LIBDIR}) + diff --git a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp b/libclambcc/ClamBCAnalyzer.cpp similarity index 86% rename from libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp rename to libclambcc/ClamBCAnalyzer.cpp index 0527edaa83c..b82197e9d24 100644 --- a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.cpp +++ b/libclambcc/ClamBCAnalyzer.cpp @@ -20,8 +20,9 @@ * MA 02110-1301, USA. */ #include "ClamBCAnalyzer.h" -#include "Common/ClamBCCommon.h" -#include "Common/ClamBCUtilities.h" + +#include "ClamBCCommon.h" +#include "ClamBCUtilities.h" #include #include @@ -37,7 +38,7 @@ using namespace llvm; -extern cl::opt WriteDI; +AnalysisKey ClamBCAnalyzer::Key; static unsigned getSpecialIndex(StringRef Name) { @@ -67,58 +68,55 @@ static bool compare_lt_functions(Function *A, Function *B) return NA.compare(NB) < 0; } -bool ClamBCAnalyzer::runOnModule(Module &M) +void ClamBCAnalysis::run(Module &m) { - pMod = &M; + pMod = &m; // Determine bytecode kind, default is 0 (generic). kind = 0; - GlobalVariable *GVKind = M.getGlobalVariable("__clambc_kind"); + GlobalVariable *GVKind = pMod->getGlobalVariable("__clambc_kind"); if (GVKind && GVKind->hasDefinitiveInitializer()) { kind = cast(GVKind->getInitializer())->getValue().getZExtValue(); // GVKind->setLinkage(GlobalValue::InternalLinkage); // Do not set the linkage type to internal, because the optimizer will remove it. if (kind >= 65536) { - ClamBCStop("Bytecode kind cannot be higher than 64k\n", &M); + ClamBCStop("Bytecode kind cannot be higher than 64k\n", pMod); } } - GlobalVariable *G = M.getGlobalVariable("__Copyright"); + GlobalVariable *G = pMod->getGlobalVariable("__Copyright"); if (G && G->hasDefinitiveInitializer()) { Constant *C = G->getInitializer(); // std::string c; StringRef c; if (!getConstantStringInfo(C, c)) { - ClamBCStop("Failed to extract copyright string\n", &M); + ClamBCStop("Failed to extract copyright string\n", pMod); } - // copyright = strdup(c.c_str()); copyright = c.str(); - // G->setLinkage(GlobalValue::InternalLinkage); // Do not set the linkage type to internal because the optimizer will remove it. } // Logical signature created by ClamBCLogicalCompiler. - NamedMDNode *Node = M.getNamedMetadata("clambc.logicalsignature"); + NamedMDNode *Node = pMod->getNamedMetadata("clambc.logicalsignature"); logicalSignature = Node ? cast(Node->getOperand(0)->getOperand(0))->getString() : ""; - Node = M.getNamedMetadata("clambc.virusnames"); + Node = pMod->getNamedMetadata("clambc.virusnames"); virusnames = Node ? cast(Node->getOperand(0)->getOperand(0))->getString() : ""; unsigned tid, fid; // unsigned cid; - startTID = tid = clamav::initTypeIDs(typeIDs, M.getContext()); + startTID = tid = clamav::initTypeIDs(typeIDs, pMod->getContext()); // arrays of [2 x i8] .. [7 x i8] used for struct padding for (unsigned i = 1; i < 8; i++) { - const Type *Ty = llvm::ArrayType::get(llvm::Type::getInt8Ty(M.getContext()), + const Type *Ty = llvm::ArrayType::get(llvm::Type::getInt8Ty(pMod->getContext()), i); typeIDs[Ty] = tid++; extraTypes.push_back(Ty); } std::vector types; - // cid=1; fid = 1; - for (Module::global_iterator I = M.global_begin(); I != M.global_end(); ++I) { + for (Module::global_iterator I = pMod->global_begin(); I != pMod->global_end(); ++I) { GlobalVariable *gv = llvm::cast(I); std::set insts; std::set globs; @@ -136,14 +134,14 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // globals, so introduce helper globals for nested constant expressions. if (CE->getOpcode() != Instruction::GetElementPtr) { if (CE->getOpcode() == Instruction::BitCast) { - GlobalVariable *GV = new GlobalVariable(M, CE->getType(), true, + GlobalVariable *GV = new GlobalVariable(*pMod, CE->getType(), true, GlobalValue::InternalLinkage, CE, I->getName() + "_bc"); CEMap[CE] = GV; continue; } errs() << "UNSUPPORTED: " << *CE << "\n"; - ClamBCStop("Unsupported constant expression", &M); + ClamBCStop("Unsupported constant expression", pMod); } ConstantInt *C0 = dyn_cast(CE->getOperand(1)); ConstantInt *C1 = dyn_cast(CE->getOperand(2)); @@ -152,7 +150,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) errs() << "UNSUPPORTED: " << *CE << "\n"; ClamBCStop("Unsupported constant expression, nonzero first" " index", - &M); + pMod); } const DataLayout &dataLayout = pMod->getDataLayout(); @@ -161,18 +159,15 @@ bool ClamBCAnalyzer::runOnModule(Module &M) indices.push_back(CE->getOperand(i)); } Type *IP8Ty = PointerType::getUnqual(Type::getInt8Ty(CE->getContext())); + Type *type = getResultType(CE); - Type *type = CE->getOperand(0)->getType(); - if (llvm::isa(type)) { - type = llvm::cast(type)->getElementType(); - } uint64_t idx = dataLayout.getIndexedOffsetInType(type, indices); Value *Idxs[1]; Idxs[0] = ConstantInt::get(Type::getInt64Ty(CE->getContext()), idx); Constant *C = ConstantExpr::getPointerCast(CE->getOperand(0), IP8Ty); ConstantExpr *NewCE = - cast(ConstantExpr::getGetElementPtr(nullptr, C, + cast(ConstantExpr::getGetElementPtr(C->getType(), C, Idxs)); NewCE = cast(ConstantExpr::getPointerCast(NewCE, CE->getType())); @@ -180,7 +175,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) CE->replaceAllUsesWith(NewCE); } CE = NewCE; - GlobalVariable *GV = new GlobalVariable(M, CE->getType(), true, + GlobalVariable *GV = new GlobalVariable(*pMod, CE->getType(), true, GlobalValue::InternalLinkage, CE, I->getName() + "_" + Twine(v)); @@ -190,6 +185,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // Collect types of all globals. const Type *Ty = I->getType(); + Ty = I->getValueType(); if (!typeIDs.count(Ty)) { extraTypes.push_back(Ty); typeIDs[Ty] = tid++; @@ -199,30 +195,39 @@ bool ClamBCAnalyzer::runOnModule(Module &M) // Sort functions. std::vector functions; - for (Module::iterator I = M.begin(), E = M.end(); I != E;) { + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E;) { Function *F = &*I; ++I; functions.push_back(F); - F->removeFromParent(); } + + /* + * Remove all functions and re-insert them sorted by number of arguments. + * This is a requirement of the writer, but I have not verified that it is + * still necessary. + */ + for (size_t i = 0; i < functions.size(); i++) { + functions[i]->removeFromParent(); + } + std::sort(functions.begin(), functions.end(), compare_lt_functions); for (std::vector::iterator I = functions.begin(), E = functions.end(); I != E; ++I) { - M.getFunctionList().push_back(*I); + pMod->getFunctionList().push_back(*I); } - Function *ep = M.getFunction("entrypoint"); + Function *ep = pMod->getFunction("entrypoint"); if (!ep) { - ClamBCStop("Bytecode must define an entrypoint (with 0 parameters)!\n", &M); + ClamBCStop("Bytecode must define an entrypoint (with 0 parameters)!\n", pMod); } if (ep->getFunctionType()->getNumParams() != 0) { - ClamBCStop("Bytecode must define an entrypoint with 0 parameters!\n", &M); + ClamBCStop("Bytecode must define an entrypoint with 0 parameters!\n", pMod); } unsigned dbgid = 0; - unsigned MDDbgKind = M.getContext().getMDKindID("dbg"); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + unsigned MDDbgKind = pMod->getContext().getMDKindID("dbg"); + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { Function &F = *I; if (F.isDeclaration()) { // Don't add prototypes of debug intrinsics @@ -261,20 +266,13 @@ bool ClamBCAnalyzer::runOnModule(Module &M) extraTypes.push_back(Ty); typeIDs[Ty] = tid++; } + for (inst_iterator II = inst_begin(F), IE = inst_end(F); II != IE; ++II) { const Type *Ty; // Skip debug intrinsics, so we don't add llvm.dbg.* types if (isa(&*II)) { continue; } - if (WriteDI) { - if (MDNode *Dbg = II->getMetadata(MDDbgKind)) { - if (!dbgMap.count(Dbg)) { - dbgMap[Dbg] = dbgid++; - } - anyDbgIds = true; - } - } // Collect types of all instructions. if (const AllocaInst *AI = dyn_cast(&*II)) { @@ -282,6 +280,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) } else { Ty = II->getType(); } + if (const GetElementPtrInst *GEPI = dyn_cast(&*II)) { const Type *GTy = GEPI->getPointerOperand()->getType(); if (!typeIDs.count(GTy)) { @@ -290,6 +289,24 @@ bool ClamBCAnalyzer::runOnModule(Module &M) typeIDs[GTy] = tid++; } } + + /* + * Collect types of all operands to each instruction. Basic Blocks + * don't have types, so they can be skipped. + */ + for (size_t i = 0; i < II->getNumOperands(); i++) { + Value *operand = II->getOperand(i); + if (llvm::isa(operand)) { + continue; + } + Type *pt = operand->getType(); + if (0 == typeIDs.count(pt)) { + types.push_back(pt); + extraTypes.push_back(pt); + typeIDs[pt] = tid++; + } + } + if (typeIDs.count(Ty)) { continue; } @@ -316,7 +333,7 @@ bool ClamBCAnalyzer::runOnModule(Module &M) continue; } DEBUGERR << *STy << "\n"; - ClamBCStop("Bytecode cannot use abstract types (only pointers to them)!", &M); + ClamBCStop("Bytecode cannot use abstract types (only pointers to them)!", pMod); } } if (!typeIDs.count(STy)) { @@ -328,21 +345,18 @@ bool ClamBCAnalyzer::runOnModule(Module &M) } if (tid >= 65536) { - ClamBCStop("Attempted to use more than 64k types", &M); + ClamBCStop("Attempted to use more than 64k types", pMod); } printGlobals(startTID); - - return false; } -void ClamBCAnalyzer::printGlobals(uint16_t stid) +void ClamBCAnalysis::printGlobals(uint16_t stid) { llvm::Module &M = *pMod; // Describe types maxApi = 0; - // std::vector apis; - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) { + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { llvm::Function *pFunc = llvm::cast(I); // Skip dead declarations if (I->use_empty()) { @@ -392,20 +406,19 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) for (StringMap::iterator I = globalsMap.begin(), E = globalsMap.end(); I != E; ++I) { - if (GlobalVariable *GV = M.getGlobalVariable(I->getKey())) { + if (GlobalVariable *GV = pMod->getGlobalVariable(I->getKey())) { specialGlobals.insert(GV); globals[GV] = I->getValue(); if (I->getValue() > maxGlobal) maxGlobal = I->getValue(); } } - if (GlobalVariable *GV = M.getGlobalVariable("__clambc_kind")) { + if (GlobalVariable *GV = pMod->getGlobalVariable("__clambc_kind")) { specialGlobals.insert(GV); } - // std::vector globalInits; globalInits.push_back(0); // ConstantPointerNul placeholder - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { + for (Module::global_iterator I = pMod->global_begin(), E = pMod->global_end(); I != E; ++I) { GlobalVariable *pgv = llvm::cast(I); if (specialGlobals.count(pgv)) { continue; @@ -433,8 +446,9 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) &M); } Constant *C = pgv->getInitializer(); - if (C->use_empty()) + if (C->use_empty()) { continue; + } globalInits.push_back(C); globals[pgv] = i++; if (i >= 32768) { @@ -473,7 +487,7 @@ void ClamBCAnalyzer::printGlobals(uint16_t stid) } // need to use bytecode_api_decl.c.h -void ClamBCAnalyzer::populateAPIMap() +void ClamBCAnalysis::populateAPIMap() { unsigned id = 1; apiMap["test1"] = id++; @@ -585,14 +599,16 @@ void ClamBCAnalyzer::populateAPIMap() apiMap["bzip2_done"] = id++; } -void ClamBCAnalyzer::getAnalysisUsage(AnalysisUsage &AU) const +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - // Preserve the CFG, we only eliminate PHIs, and introduce some - // loads/stores. - AU.setPreservesAll(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCAnalysis", "v0.1", + [](PassBuilder &PB) { + PB.registerAnalysisRegistrationCallback( + [](ModuleAnalysisManager &mam) { + mam.registerPass([]() { return ClamBCAnalyzer(); }); + }); + }}; } -char ClamBCAnalyzer::ID = 0; -static RegisterPass X("clambc-analyzer", - "ClamAV bytecode register allocator"); - -const PassInfo *const ClamBCAnalyzerID = &X; diff --git a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h b/libclambcc/ClamBCAnalyzer.h similarity index 73% rename from libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h rename to libclambcc/ClamBCAnalyzer.h index aec37d78ccb..325d61f25fb 100644 --- a/libclambcc/ClamBCAnalyzer/ClamBCAnalyzer.h +++ b/libclambcc/ClamBCAnalyzer.h @@ -22,30 +22,26 @@ #ifndef CLAMBC_ANALYZER_H_ #define CLAMBC_ANALYZER_H_ -#include "Common/clambc.h" +#include "clambc.h" -#include -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" +#include +#include +#include +#include +#include #include #include -#include "llvm/Support/raw_ostream.h" +#include + +#include +#include +#include + +#include #include #include -//TODO list -//1. Add checks for either source code or copyright clause. -//2. Take a look at the way CEMap is used. It is checking for uses of some types of ConstantExpr's, -// and creating globals to go with them. I don't fully understand why that is being done. -//3. Move validation of entrypoint somewhere. This is an analyzer pass, and should not fail the build. -//4. Migrate all the printing from 'printGlobals' to the module. -//5. Cannot see where banMap has any functions inserted. Do we need it? -//6. Evaluate the TODO in runOnModule. - -class ClamBCAnalyzer : public llvm::ModulePass +class ClamBCAnalysis { protected: typedef llvm::DenseMap TypeMapTy; @@ -80,30 +76,29 @@ class ClamBCAnalyzer : public llvm::ModulePass virtual void printGlobals(uint16_t stid); /* TODO - * - * bytecode_api_decl.c.h - * - * Temporarily did this to populate the api map of the clamav functions that are allowed. Previously, - * there was c++ code that would parse the header file and read in the api's. I am planning on having them - * compiled into the module. - * - * bytecode_api_decl.c.h includes 5 potentially problematic files. - * clamav-types.h => stored in the clamav build directory - * type_desc.h => stored in clamav_checkout/libclamav - * bytecode_api.h => stored in clamav install directory somewhere - * bytecode_api_impl.h => stored in clamav_checkout/libclamav - * bytecode_priv.h => stored in clamav_checkout/libclamav - * - * For NOW, we are just going to hardcode the api map. - * - * Eventually we will have clamav install api headers as part of the build, and just read those. - */ + * + * bytecode_api_decl.c.h + * + * Temporarily did this to populate the api map of the clamav functions that are allowed. Previously, + * there was c++ code that would parse the header file and read in the api's. I am planning on having them + * compiled into the module. + * + * bytecode_api_decl.c.h includes 5 potentially problematic files. + * clamav-types.h => stored in the clamav build directory + * type_desc.h => stored in clamav_checkout/libclamav + * bytecode_api.h => stored in clamav install directory somewhere + * bytecode_api_impl.h => stored in clamav_checkout/libclamav + * bytecode_priv.h => stored in clamav_checkout/libclamav + * + * For NOW, we are just going to hardcode the api map. + * + * Eventually we will have clamav install api headers as part of the build, and just read those. + */ virtual void populateAPIMap(); public: static char ID; - explicit ClamBCAnalyzer() - : ModulePass(ID) + explicit ClamBCAnalysis() { populateAPIMap(); @@ -117,10 +112,8 @@ class ClamBCAnalyzer : public llvm::ModulePass globalsMap["__clambc_match_offsets"] = GLOBAL_MATCH_OFFSETS; } - ~ClamBCAnalyzer() {} - virtual bool runOnModule(llvm::Module &m) override; - - virtual void getAnalysisUsage(llvm::AnalysisUsage &au) const override; + ~ClamBCAnalysis() {} + virtual void run(llvm::Module &m); virtual uint32_t getTypeID(const llvm::Type *const t) { @@ -245,4 +238,27 @@ class ClamBCAnalyzer : public llvm::ModulePass } }; -#endif //CLAMBC_ANALYZER_H_ +class ClamBCAnalyzer : public llvm::AnalysisInfoMixin +{ + protected: + ClamBCAnalysis clamBCAnalysis; + + public: + friend llvm::AnalysisInfoMixin; + static llvm::AnalysisKey Key; + + ClamBCAnalyzer() + : clamBCAnalysis() {} + virtual ~ClamBCAnalyzer() {} + + typedef ClamBCAnalysis Result; + + ClamBCAnalysis &run(llvm::Module &mod, llvm::ModuleAnalysisManager &mam) + { + clamBCAnalysis.run(mod); + + return clamBCAnalysis; + } +}; + +#endif // CLAMBC_ANALYZER_H_ diff --git a/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp b/libclambcc/ClamBCChangeMallocArgSize.cpp similarity index 74% rename from libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp rename to libclambcc/ClamBCChangeMallocArgSize.cpp index 65111698bd8..c8e9da59727 100644 --- a/libclambcc/ClamBCChangeMallocArgSize/ClamBCChangeMallocArgSize.cpp +++ b/libclambcc/ClamBCChangeMallocArgSize.cpp @@ -1,20 +1,22 @@ +#include "clambc.h" #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include +#include -#include "Common/clambc.h" +#include +#include using namespace llvm; -namespace +namespace ChangeMallocArgSize { -class ChangeMallocArgSize : public ModulePass +class ChangeMallocArgSize : public PassInfoMixin { protected: std::vector changeValues; @@ -38,7 +40,8 @@ class ChangeMallocArgSize : public ModulePass for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { CallInst* pCall = llvm::dyn_cast(i); if (pCall) { - if ("malloc" == pCall->getCalledValue()->getName()) { + Function* pFunc = pCall->getCalledFunction(); + if (pFunc && ("malloc" == pFunc->getName())) { Value* pv = pCall->getOperand(0); if (PHINode* pn = llvm::dyn_cast(pv)) { addChangeValue(pn); @@ -62,7 +65,7 @@ class ChangeMallocArgSize : public ModulePass } } - /* Yes, I know there is a "getTerminator" function, but I have come across blocks + /* Yes, I know there is a "getTerminator" function, but I have come across blocks * that have more than one branch instruction (I think it is a bug in the runtime), but * until that is resolved, I want to use this function. */ @@ -136,13 +139,11 @@ class ChangeMallocArgSize : public ModulePass } public: - static char ID; ChangeMallocArgSize() - : ModulePass(ID) { } - virtual bool runOnModule(Module& m) override + virtual PreservedAnalyses run(Module& m, ModuleAnalysisManager& MAM) { pMod = &m; dstType = Type::getInt64Ty(pMod->getContext()); @@ -151,12 +152,26 @@ class ChangeMallocArgSize : public ModulePass fixBitWidths(); - return true; + return PreservedAnalyses::none(); } }; // end of struct ChangeMallocArgSize -} // end of anonymous namespace +} // namespace ChangeMallocArgSize -char ChangeMallocArgSize::ID = 0; -static RegisterPass X("clambc-change-malloc-arg-size", "ChangeMallocArgSize Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ChangeMallocArgSize", "v0.1", + [](PassBuilder& PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager& FPM, + ArrayRef) { + if (Name == "clambc-change-malloc-arg-size") { + FPM.addPass(ChangeMallocArgSize::ChangeMallocArgSize()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/Common/ClamBCCommon.h b/libclambcc/ClamBCCommon.h similarity index 100% rename from libclambcc/Common/ClamBCCommon.h rename to libclambcc/ClamBCCommon.h diff --git a/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp b/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp deleted file mode 100644 index 981d97029c5..00000000000 --- a/libclambcc/ClamBCConvertIntrinsics/ClamBCConvertIntrinsics.cpp +++ /dev/null @@ -1,135 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/DerivedTypes.h" - -#include - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" - -#include - -using namespace llvm; - -namespace -{ - -class ConvertIntrinsics : public ModulePass -{ - - public: - static char ID; - - ConvertIntrinsics() - : ModulePass(ID) {} - - virtual ~ConvertIntrinsics() {} - - virtual bool runOnModule(Module& mod) - { - bChanged = false; - pMod = &mod; - - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function* pFunc = llvm::cast(i); - processFunction(pFunc); - } - - for (size_t i = 0; i < delLst.size(); i++) { - delLst[i]->eraseFromParent(); - } - - return bChanged; - } - - protected: - Module* pMod = nullptr; - bool bChanged = false; - std::vector delLst; - - void processFunction(Function* pFunc) - { - - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - BasicBlock* pBB = llvm::cast(i); - processBasicBlock(pBB); - } - } - - void processBasicBlock(BasicBlock* pBB) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - if (CallInst* pci = llvm::dyn_cast(i)) { - if (Function* f = llvm::dyn_cast(pci->getCalledValue())) { - if ("llvm.memset.p0i8.i64" == f->getName()) { - convertMemset(pci); - } - } - } - } - } - - void convertMemset(CallInst* pci) - { - std::vector args; - Type* i32Ty = Type::getInt32Ty(pMod->getContext()); - - for (size_t i = 0; i < pci->getNumArgOperands(); i++) { - Value* pv = pci->getArgOperand(i); - if (2 == i) { - if (ConstantInt* ci = llvm::dyn_cast(pv)) { - pv = ConstantInt::get(i32Ty, ci->getValue().getLimitedValue()); - } else { - pv = CastInst::CreateTruncOrBitCast(pv, i32Ty, "ConvertIntrinsics_trunc_", pci); - } - } - - args.push_back(pv); - } - - Constant* f = getNewMemset(); - CallInst::Create(getMemsetType(), f, args, "", pci); - delLst.push_back(pci); - } - - llvm::Constant* getNewMemset() - { - static llvm::Constant* ret = nullptr; - - if (nullptr == ret) { - - FunctionType* retType = getMemsetType(); - ret = pMod->getOrInsertFunction("llvm.memset.p0i8.i32", retType); - - assert(ret && "Could not get memset"); - } - - return ret; - } - - llvm::FunctionType* getMemsetType() - { - static FunctionType* retType = nullptr; - if (nullptr == retType) { - LLVMContext& c = pMod->getContext(); - retType = FunctionType::get(Type::getVoidTy(c), - {Type::getInt8PtrTy(c), Type::getInt8Ty(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, - false); - } - return retType; - } -}; - -} // end of anonymous namespace - -char ConvertIntrinsics::ID = 0; -static RegisterPass XX("clambc-convert-intrinsics", "Convert Intrinsics to 32-bit", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCConvertIntrinsicsTo32Bit.cpp b/libclambcc/ClamBCConvertIntrinsicsTo32Bit.cpp new file mode 100644 index 00000000000..ecb0c0032ca --- /dev/null +++ b/libclambcc/ClamBCConvertIntrinsicsTo32Bit.cpp @@ -0,0 +1,203 @@ +#include "clambc.h" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include + +using namespace llvm; + +namespace ClamBCConvertIntrinsicsTo32Bit +{ + +class ClamBCConvertIntrinsicsTo32Bit : public PassInfoMixin +{ + + public: + static char ID; + + ClamBCConvertIntrinsicsTo32Bit() {} + + virtual ~ClamBCConvertIntrinsicsTo32Bit() {} + + PreservedAnalyses run(Module& mod, ModuleAnalysisManager& MAM) + { + bChanged = false; + pMod = &mod; + + initializeReplacements(); + + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function* pFunc = llvm::cast(i); + processFunction(pFunc); + } + + for (size_t i = 0; i < delLst.size(); i++) { + delLst[i]->eraseFromParent(); + } + + if (bChanged) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + + protected: + Module* pMod = nullptr; + bool bChanged = false; + std::vector delLst; + + typedef struct { + llvm::Function* oldFunc; + llvm::FunctionCallee newFunc; + const size_t paramIdx; + } Replacement; + std::vector replacements; + + llvm::FunctionType* getMemset32Type() + { + LLVMContext& c = pMod->getContext(); + return FunctionType::get(Type::getVoidTy(c), + {Type::getInt8PtrTy(c), Type::getInt8Ty(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, + false); + } + + llvm::FunctionType* getMemcpy32Type() + { + LLVMContext& c = pMod->getContext(); + return FunctionType::get(Type::getVoidTy(c), + {Type::getInt8PtrTy(c), Type::getInt8PtrTy(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, + false); + } + + llvm::FunctionType* getMemmove32Type() + { + LLVMContext& c = pMod->getContext(); + return FunctionType::get(Type::getVoidTy(c), + {Type::getInt8PtrTy(c), Type::getInt8PtrTy(c), Type::getInt32Ty(c), Type::getInt1Ty(c)}, + false); + } + + void initializeReplacements() + { + /*There are different calls when you use the -no-opaque flags.*/ + + /*memsets*/ + FunctionType* ft = getMemset32Type(); + Function* pFunc = pMod->getFunction("llvm.memset.p0i8.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memset.p0i8.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + pFunc = pMod->getFunction("llvm.memset.p0.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memset.p0.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + + /*memcpys*/ + ft = getMemcpy32Type(); + pFunc = pMod->getFunction("llvm.memcpy.p0i8.p0i8.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memcpy.p0i8.p0i8.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + pFunc = pMod->getFunction("llvm.memcpy.p0.p0.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memcpy.p0.p0.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + + /*memmoves*/ + ft = getMemmove32Type(); + pFunc = pMod->getFunction("llvm.memmove.p0.p0.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memmove.p0.p0.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + pFunc = pMod->getFunction("llvm.memmove.p0i8.p0i8.i64"); + if (pFunc) { + FunctionCallee rep = pMod->getOrInsertFunction("llvm.memmove.p0i8.p0i8.i32", ft); + replacements.push_back({pFunc, rep, 2}); + } + } + + void processFunction(Function* pFunc) + { + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { + BasicBlock* pBB = llvm::cast(i); + processBasicBlock(pBB); + } + } + + void processBasicBlock(BasicBlock* pBB) + { + for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { + if (CallInst* pci = llvm::dyn_cast(i)) { + Function* f = pci->getCalledFunction(); + if (nullptr != f) { + for (size_t i = 0; i < replacements.size(); i++) { + if (replacements[i].oldFunc == f) { + convertCall(pci, replacements[i]); + } + } + } + } + } + } + + void convertCall(CallInst* pci, const Replacement& r) + { + std::vector args; + Type* i32Ty = Type::getInt32Ty(pMod->getContext()); + + for (size_t i = 0; i < pci->arg_size(); i++) { + Value* pv = pci->getArgOperand(i); + if (r.paramIdx == i) { + if (ConstantInt* ci = llvm::dyn_cast(pv)) { + pv = ConstantInt::get(i32Ty, ci->getValue().getLimitedValue()); + } else { + pv = CastInst::CreateTruncOrBitCast(pv, i32Ty, "ClamBCConvertIntrinsicsTo32Bit_trunc_", pci); + } + + pci->setArgOperand(i, pv); + } + } + + pci->setCalledFunction(r.newFunc); + } +}; + +} // namespace ClamBCConvertIntrinsicsTo32Bit + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCConvertIntrinsicsTo32Bit", "v0.1", + [](PassBuilder& PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager& FPM, + ArrayRef) { + if (Name == "clambc-convert-intrinsics-to-32Bit") { + FPM.addPass(ClamBCConvertIntrinsicsTo32Bit::ClamBCConvertIntrinsicsTo32Bit()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/Common/ClamBCDiagnostics.cpp b/libclambcc/ClamBCDiagnostics.cpp similarity index 75% rename from libclambcc/Common/ClamBCDiagnostics.cpp rename to libclambcc/ClamBCDiagnostics.cpp index 79c634b61f9..4be023f8fcc 100644 --- a/libclambcc/Common/ClamBCDiagnostics.cpp +++ b/libclambcc/ClamBCDiagnostics.cpp @@ -20,7 +20,10 @@ * MA 02110-1301, USA. */ #define DEBUGTYPE "clambcdiags" + +#include "clambc.h" #include "ClamBCDiagnostics.h" + #include #include #include @@ -29,43 +32,16 @@ #include #include -#include "clambc.h" using namespace llvm; -#if 0 -static inline void printSep(bool hasColors) -{ - if (hasColors) { - errs().resetColor(); - } - errs() << ":"; - if (hasColors) { - errs().changeColor(raw_ostream::SAVEDCOLOR, true); - } -} -#endif - // Print the main compile unit's source filename, // falls back to printing the module identifier. static void printLocation(const llvm::Module *M) { NamedMDNode *ND = M->getNamedMetadata("llvm.dbg.gv"); if (ND) { -#if 0 - unsigned N = ND->getNumOperands(); - // Try to find main compile unit - for (unsigned i = 0; i < N; i++) { - DIGlobalVariable G(ND->getOperand(i)); - DICompileUnit CU(G.getCompileUnit()); - if (!CU.isMain()) - continue; - errs() << /*CU.getDirectory() << "/" <<*/ CU.getFilename() << ": "; - return; - } -#else DEBUGERR << "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } errs() << M->getModuleIdentifier() << ": "; } @@ -81,27 +57,10 @@ static void printLocation(const llvm::Function *F) I != E; ++I) { if (const Instruction *T = I->getTerminator()) { if (MDNode *N = T->getMetadata(MDDebugKind)) { -#if 0 - DILocation Loc(N); - DIScope Scope = Loc.getScope(); - while (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope.getNode()); - Scope = LB.getContext(); - } - if (Scope.isSubprogram()) { - DISubprogram SP(Scope.getNode()); - errs() << /*Loc.getDirectory() << "/" << */ Loc.getFilename() - << ": in function '" - << SP.getDisplayName() - << "': "; - return; - } -#else DEBUGERR << N << "\n"; DEBUGERR << *N << "\n"; DEBUGERR << "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } } } @@ -126,31 +85,9 @@ void printLocation(const llvm::Instruction *I, bool fallback) BasicBlock::const_iterator ItB = BB->begin(); while (It != ItB) { if (MDNode *N = It->getMetadata("dbg")) { -#if 0 - DILocation Loc(N); - errs() << /*Loc.getDirectory() << "/" <<*/ Loc.getFilename() - << ":" << Loc.getLineNumber(); - if (unsigned Col = Loc.getColumnNumber()) { - errs() << ":" << Col; - } - if (approx) - errs() << "(?)"; - errs() << ": "; - DIScope Scope = Loc.getScope(); - while (Scope.isLexicalBlock()) { - DILexicalBlock LB(Scope.getNode()); - Scope = LB.getContext(); - } - if (Scope.isSubprogram()) { - DISubprogram SP(Scope.getNode()); - errs() << "in function '" << SP.getDisplayName() << "': "; - } - return; -#else DEBUGERR << *N << "\n"; DEBUGERR << approx << "\n"; assert(0 && "FIGURE OUT WHAT TO DO IF I ACTUALLY GET HERE"); -#endif } approx = true; --It; @@ -175,18 +112,8 @@ void printValue(const llvm::Value *V, bool printLocation, bool fallback) unsigned Line = 0; std::string File; std::string Dir; -#if 0 - if (!getLocationInfo(V, DisplayName, Type, Line, File, Dir)) { - if (fallback) - errs() << *V << "\n: "; - else - errs() << V->getName() << ": "; - return; - } -#else DEBUGERR << "FIXME: FIGURE OUT WHAT 'getLocationInfo' has been replaced with" << "\n"; -#endif errs() << "'" << DisplayName << "' "; if (printLocation) errs() << " (" << File << ":" << Line << ")"; @@ -201,15 +128,8 @@ void printLocation(const llvm::Module *M, const llvm::Value *V) unsigned Line = 0; std::string File; std::string Dir; -#if 0 - if (!getLocationInfo(V, DisplayName, Type, Line, File, Dir)) { - printLocation(M); - return; - } -#else DEBUGERR << "FIXME: FIGURE OUT WHAT 'getLocationInfo' has been replaced with" << "\n"; -#endif errs() << /*Dir << "/" <<*/ File << ":" << Line << ": "; } @@ -261,7 +181,7 @@ static void printMsg(const Twine &Msg, const llvm::Module *M, errs().resetColor(); if (I) { errs() << "\t at : " << *I << "\n"; - //DEBUG(I->getParent()->dump()); + // DEBUG(I->getParent()->dump()); DEBUGERR << *(I->getParent()) << "\n"; ; } diff --git a/libclambcc/Common/ClamBCDiagnostics.h b/libclambcc/ClamBCDiagnostics.h similarity index 100% rename from libclambcc/Common/ClamBCDiagnostics.h rename to libclambcc/ClamBCDiagnostics.h diff --git a/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp b/libclambcc/ClamBCExtendPHIsTo64Bit.cpp similarity index 72% rename from libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp rename to libclambcc/ClamBCExtendPHIsTo64Bit.cpp index 96566027b84..0885e1598a4 100644 --- a/libclambcc/ClamBCExtendPHIsTo64Bit/ClamBCExtendPHIsTo64Bit.cpp +++ b/libclambcc/ClamBCExtendPHIsTo64Bit.cpp @@ -19,14 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" +#include "bytecode_api.h" #include "clambc.h" -#include "ClamBCModule.h" -#include "ClamBCAnalyzer/ClamBCAnalyzer.h" -#include "Common/ClamBCUtilities.h" +#include "ClamBCUtilities.h" #include -//#include "ClamBCTargetMachine.h" #include #include #include @@ -37,7 +34,6 @@ #include #include #include -//#include "llvm/Config/config.h" #include #include #include @@ -50,12 +46,16 @@ #include +#include +#include + using namespace llvm; -class ClamBCExtendPHIsTo64Bit : public ModulePass +class ClamBCExtendPHIsTo64Bit : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; + bool bChanged = false; virtual void convertPHIs(Function *pFunc) { @@ -103,7 +103,7 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass continue; } - //Not allowed in bytecode sigs, but no reason not to support it. + // Not allowed in bytecode sigs, but no reason not to support it. if (llvm::isa(i)) { continue; } @@ -115,17 +115,17 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass Instruction *cast = CastInst::CreateIntegerCast(newNode, origType, true, "ClamBCConvertPHINodes_", insPt); pn->replaceAllUsesWith(cast); pn->eraseFromParent(); + bChanged = true; } public: static char ID; - explicit ClamBCExtendPHIsTo64Bit() - : ModulePass(ID) {} + explicit ClamBCExtendPHIsTo64Bit() {} virtual ~ClamBCExtendPHIsTo64Bit() {} - virtual bool runOnModule(Module &m) + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { pMod = &m; @@ -135,16 +135,33 @@ class ClamBCExtendPHIsTo64Bit : public ModulePass convertPHIs(pFunc); } - return true; + if (bChanged) { + /* Since we changed the IR here invalidate all the previous analysis. + * We only want to invalidate the analysis when we change something, + * since it is expensive to compute. + */ + return PreservedAnalyses::none(); + } + /*We didn't change anything, so keep the previous analysis.*/ + return PreservedAnalyses::all(); } }; -char ClamBCExtendPHIsTo64Bit::ID = 0; -static RegisterPass X("clambc-extend-phis-to-64bit", "ClamBCExtendPHIsTo64Bit Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); - -llvm::ModulePass *createClamBCExtendPHIsTo64Bit() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCExtendPHIsTo64Bit(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCExtendPHIsTo64Bit", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-extend-phis-to-64-bit") { + FPM.addPass(ClamBCExtendPHIsTo64Bit()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp b/libclambcc/ClamBCLogicalCompiler.cpp similarity index 87% rename from libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp rename to libclambcc/ClamBCLogicalCompiler.cpp index 18adfb04ae9..f5b2a19ab2e 100644 --- a/libclambcc/ClamBCLogicalCompiler/ClamBCLogicalCompiler.cpp +++ b/libclambcc/ClamBCLogicalCompiler.cpp @@ -20,38 +20,36 @@ * MA 02110-1301, USA. */ -#include "ClamBCModule.h" -#include -#include "../Common/bytecode_api.h" #include "clambc.h" +#include "bytecode_api.h" #include "ClamBCDiagnostics.h" -#include "ClamBCModule.h" #include "ClamBCCommon.h" #include "ClamBCUtilities.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/StringSet.h" -#include "llvm/Analysis/ConstantFolding.h" + +#include +#include +#include +#include +#include #include -#include "llvm/Analysis/ValueTracking.h" +#include #include #include #include #include -//#include -#include -#include +#include +#include +#include #include -#include "llvm/Support/Debug.h" +#include #include -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" +#include +#include #include -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/Local.h" -#include "llvm/Transforms/IPO.h" +#include +#include +#include #include -//#include #include #include @@ -59,17 +57,15 @@ using namespace llvm; -namespace +namespace ClamBCLogicalCompiler { -class ClamBCLogicalCompiler : public ModulePass +class ClamBCLogicalCompiler : public PassInfoMixin { public: - static char ID; - ClamBCLogicalCompiler() - : ModulePass(ID) {} + ClamBCLogicalCompiler() {} - virtual bool runOnModule(Module &M); + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM); virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); @@ -90,9 +86,6 @@ class ClamBCLogicalCompiler : public ModulePass bool compileVirusNames(Module &M, unsigned kind); }; -char ClamBCLogicalCompiler::ID = 0; -RegisterPass X("clambc-lcompiler", - "ClamAV Logical Compiler"); enum LogicalKind { LOG_SUBSIGNATURE, LOG_AND, @@ -213,8 +206,6 @@ class LogicalNode : public FoldingSetNode if (Node->kind == LOG_ADD) { ConstantRange Cmp(APInt(32, value)); // a + c < b -> a+c in [0, b) -> a in [0-c, b-c) - /*TODO: Determine if makeSatisfyingICmpRegin is better than makeAllowedICmpRegion, - * If this is changed, check the rest.*/ ConstantRange ltRange = ConstantRange::makeSatisfyingICmpRegion(CmpInst::ICMP_ULT, Cmp); ltRange = ltRange.subtract(APInt(32, Node->op0)); @@ -334,10 +325,7 @@ class LogicalNode : public FoldingSetNode return getNode(M); } - /* - * aragusa: All this is doing is checking for duplicates in whatever collection begin and end reference. - * Why are we putting them in another local? - * */ + /*Test for duplicates*/ bool checkUniq() { LogicalSet nodes; @@ -601,14 +589,23 @@ class LogicalCompiler { Value *V = LI.getOperand(0); ConstantExpr *CE = dyn_cast(V); - if (!CE || CE->getOpcode() != Instruction::GetElementPtr || - CE->getOperand(0) != GV || CE->getNumOperands() != 3 || - !cast(CE->getOperand(1))->isZero()) { - printDiagnostic("Logical signature: unsupported read", &LI); - return false; + ConstantInt *CI = nullptr; + if (CE) { + if (CE->getOpcode() != Instruction::GetElementPtr || + CE->getOperand(0) != GV || CE->getNumOperands() != 3 || + !cast(CE->getOperand(1))->isZero()) { + printDiagnostic("Logical signature: unsupported read", &LI); + return false; + } + CI = cast(CE->getOperand(2)); + } else { + /* In this case, we are directly loading the global, + * instead of using a getelementptr. + * It is likely that this would have been changed by O3. + */ + CI = ConstantInt::get(LI.getParent()->getParent()->getParent()->getContext(), APInt(64, 0)); } - ConstantInt *CI = cast(CE->getOperand(2)); - Map[&LI] = LogicalNode::getSubSig(allNodes, CI->getValue().getZExtValue()); + Map[&LI] = LogicalNode::getSubSig(allNodes, CI->getValue().getZExtValue()); return true; } @@ -754,8 +751,8 @@ class LogicalCompiler if (BranchInst *bi = llvm::dyn_cast(curr->getTerminator())) { if (bi->isConditional()) { - //copy the route, so that there are separate paths for the true - //and false condition. + // copy the route, so that there are separate paths for the true + // and false condition. std::vector route; for (size_t i = 0; i < routes[idx].size(); i++) { route.push_back(new LogicalPHIHelper(routes[idx][i])); @@ -894,7 +891,7 @@ class LogicalCompiler for (size_t j = 0; j < idxs.size(); j++) { size_t idx = idxs[j]; LogicalNode *tmp = getLogicalNode(routes[idx]); - if (nullptr == pci) { //Then this isn't a constant + if (nullptr == pci) { // Then this isn't a constant LogicalNode *l = Map.find(vIncoming)->second; tmp = LogicalNode::getAnd(tmp, l); } @@ -931,6 +928,7 @@ class LogicalCompiler } Instruction *pInst = llvm::cast(I); + switch (I->getOpcode()) { case Instruction::Load: valid &= processLoad(*cast(I)); @@ -965,18 +963,107 @@ class LogicalCompiler LogicalMap::iterator CondNode = Map.find(SI->getCondition()); LogicalMap::iterator TrueNode = Map.find(SI->getTrueValue()); LogicalMap::iterator FalseNode = Map.find(SI->getFalseValue()); - if (CondNode == Map.end() || TrueNode == Map.end() || FalseNode == Map.end()) { - printDiagnostic("Logical signature: select operands must be logical" - " expressions", + + /*O3 creates blocks that look like the following, which are legitimate blocks. + * This is essentially an AND of all the %cmp.i instructions. + * Since the cmp instructions all have false at the end, comparisons will be skipped + * after one is found to be false, without having a bunch of branch instructions. + * + * We are going to handle these cases by only adding an 'and' or an 'or' if there is + * an actual logical operation, not for constants. + * + + entry: + %0 = load i32, ptr @__clambc_match_counts, align 16 + %cmp.i116.not = icmp eq i32 %0, 0 + %1 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 1), align 4 + %cmp.i112.not = icmp eq i32 %1, 0 + %or.cond = select i1 %cmp.i116.not, i1 %cmp.i112.not, i1 false + %2 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 2), align 8 + %cmp.i108.not = icmp eq i32 %2, 0 + %or.cond1 = select i1 %or.cond, i1 %cmp.i108.not, i1 false + %3 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 3), align 4 + %cmp.i104.not = icmp eq i32 %3, 0 + + + .... + + br i1 %or.cond15, label %lor.rhs, label %lor.end + + lor.rhs: ; preds = %entry + %17 = load i32, ptr getelementptr inbounds ([64 x i32], ptr @__clambc_match_counts, i64 0, i64 17), align 4 + %cmp.i = icmp ne i32 %17, 0 + br label %lor.end + + lor.end: ; preds = %lor.rhs, %entry + %18 = phi i1 [ true, %entry ], [ %cmp.i, %lor.rhs ] + ret i1 %18 + + */ + if (CondNode == Map.end() || (TrueNode == Map.end() && FalseNode == Map.end())) { + printDiagnostic("Logical signature: select condition must be logical" + " expression", SI); return false; } + // select cond, trueval, falseval -> cond && trueval || !cond && falseval - LogicalNode *N = LogicalNode::getAnd(CondNode->second, - TrueNode->second); - LogicalNode *NotCond = LogicalNode::getNot(CondNode->second); - LogicalNode *N2 = LogicalNode::getAnd(NotCond, FalseNode->second); - Map[SI] = LogicalNode::getOr(N, N2); + LogicalNode *N = nullptr; + LogicalNode *NotCond = nullptr; + LogicalNode *N2 = nullptr; + + if (TrueNode != Map.end()) { + N = LogicalNode::getAnd(CondNode->second, + TrueNode->second); + } else if (ConstantInt *pci = llvm::cast(SI->getTrueValue())) { + if (pci->isOne()) { + N = LogicalNode::getNode(*(CondNode->second)); + } else if (not pci->isZero()) { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select true value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + NotCond = LogicalNode::getNot(CondNode->second); + if (FalseNode != Map.end()) { + N2 = LogicalNode::getAnd(NotCond, FalseNode->second); + } else if (ConstantInt *pci = llvm::cast(SI->getFalseValue())) { + if (pci->isOne()) { + N2 = NotCond; + } else if (not pci->isZero()) { + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + } else { + printDiagnostic("Logical signature: Select false value must either be" + " a logical expression or a constant true/false integer.", + SI); + return false; + } + + LogicalNode *res = nullptr; + if (N && N2) { + res = LogicalNode::getOr(N, N2); + } else if (N) { + res = N; + } else if (N2) { + res = N2; + } else { + /*SHOULD be impossible, but will add a check just in case.*/ + printDiagnostic("Logical signature: Malformed select statement.", + SI); + return false; + } + Map[SI] = res; break; } case Instruction::Ret: { @@ -1299,7 +1386,7 @@ static bool checkMinimum(llvm::Module *M, std::string s, unsigned min, unsigned ref.find("VI") != StringRef::npos) { min_required = FUNC_LEVEL_096_dev; msgreq = "Logical signature use of VI/macros requires minimum " - "functionality level of FUNC_LEVEL_096_dev"; + "functionality level of FUNC_LEVEL_096_dev"; } if (kind >= BC_PDF) { @@ -1319,21 +1406,21 @@ static bool checkMinimum(llvm::Module *M, std::string s, unsigned min, unsigned pos < ref.size() && ref[pos] != '0') { min_recommended = FUNC_LEVEL_096_2; msgrec = "Logical signature use of count comparison " - "requires minimum functionality level of FUNC_LEVEL_096_2 (bb #2053)"; + "requires minimum functionality level of FUNC_LEVEL_096_2 (bb #2053)"; break; } } if (min_recommended < FUNC_LEVEL_096_4) { min_recommended = FUNC_LEVEL_096_4; msgrec = "FUNC_LEVEL_096_4 is minimum recommended engine version. Older " - "versions have quadratic load time"; + "versions have quadratic load time"; } /*JSON CHECK*/ if (hasJSONUsage(M)) { min_required = FUNC_LEVEL_098_5; msgreq = "JSON reading API requires minimum functionality level " - "of FUNC_LEVEL_098_5"; + "of FUNC_LEVEL_098_5"; } /*JSON CHECK*/ @@ -1454,11 +1541,11 @@ bool ClamBCLogicalCompiler::compileLogicalSignature(Function &F, unsigned target if (min || max || !icon1.empty() || !icon2.empty()) { if (!max) max = 255; /* for now it should be enough, we can always increase it later - */ + */ if (!min) min = FUNC_LEVEL_096_4; /* 0.96 is first to have bytecode support, but <0.96.4 has quadratic load - * time */ + * time */ LogicalSignature = LogicalSignature + (";Engine:" + Twine(min) + "-" + Twine(max) + ",").str(); } else @@ -1631,27 +1718,40 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) bool Valid = true; for (auto I : F->users()) { - Value *pv = nullptr; - pv = llvm::cast(I); - CallSite CS(pv); - if (!CS.getInstruction()) { + CallInst *pCallInst = llvm::cast(I); + if (nullptr == pCallInst) { + assert(0 && "NOT sure how this is possible"); continue; } - if (CS.getCalledFunction() != F) { + + if (F != pCallInst->getCalledFunction()) { + + /*Not sure how this is possible, either*/ printDiagnostic("setvirusname can only be directly called", - CS.getInstruction()); + pCallInst); + Valid = false; + continue; + } + + if (2 != pCallInst->arg_size()) { + printDiagnostic("setvirusname has 2 args", pCallInst); Valid = false; continue; } - assert(CS.arg_size() == 2 && "setvirusname has 2 args"); + std::string param; llvm::StringRef sr; - Value *V = CS.getArgument(0); + Value *V = llvm::cast(pCallInst->arg_begin()); + if (nullptr == V) { + printDiagnostic("Invalid argument passed to setvirusname", pCallInst); + Valid = false; + continue; + } bool result = getConstantStringInfo(V, sr); param = sr.str(); if (!result) { printDiagnostic("Argument of foundVirus() must be a constant string", - CS.getInstruction()); + pCallInst); Valid = false; continue; } @@ -1662,31 +1762,29 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) if (!p.empty() && !virusNamesSet.count(p)) { printDiagnostic(Twine("foundVirus called with an undeclared virusname: ", p), - CS.getInstruction()); + pCallInst); Valid = false; continue; } // Add prefix std::string fullname = p.empty() ? virusNamePrefix : virusNamePrefix + "." + p.str(); - IRBuilder<> builder(CS.getInstruction()->getParent()); + IRBuilder<> builder(pCallInst->getParent()); Value *C = builder.CreateGlobalStringPtr(fullname.c_str()); IntegerType *I32Ty = Type::getInt32Ty(M.getContext()); - CS.setArgument(0, C); - CS.setArgument(1, ConstantInt::get(I32Ty, fullname.size())); + pCallInst->setArgOperand(0, C); + pCallInst->setArgOperand(1, ConstantInt::get(I32Ty, fullname.size())); } return Valid; } -bool ClamBCLogicalCompiler::runOnModule(Module &M) +PreservedAnalyses ClamBCLogicalCompiler::run(Module &M, ModuleAnalysisManager &MAM) { bool Valid = true; LogicalSignature = ""; virusnames = ""; pMod = &M; - //dumpPHIGraphs(); - // Handle virusname unsigned kind = 0; GlobalVariable *GVKind = M.getGlobalVariable("__clambc_kind"); @@ -1705,14 +1803,16 @@ bool ClamBCLogicalCompiler::runOnModule(Module &M) GVKind->setConstant(true); } if (!compileVirusNames(M, kind)) { - if (!kind || kind == BC_STARTUP) - return true; + if (!kind || kind == BC_STARTUP) { + return PreservedAnalyses::all(); + } Valid = false; } if (F) { - LoopInfo &li = getAnalysis(*F).getLoopInfo(); - if (functionHasLoop(F, li)) { + FunctionAnalysisManager &fam = MAM.getResult(M).getManager(); + LoopInfo *li = &fam.getResult(*F); + if (functionHasLoop(F, *li)) { printDiagnostic("Logical signature: loop/recursion not supported", F); Valid = false; } @@ -1842,13 +1942,26 @@ bool ClamBCLogicalCompiler::runOnModule(Module &M) // diagnostic already printed exit(42); } - return true; + return PreservedAnalyses::none(); } -} // namespace -const PassInfo *const ClamBCLogicalCompilerID = &X; - -llvm::ModulePass *createClamBCLogicalCompiler() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCLogicalCompiler(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompiler", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lcompiler") { + FPM.addPass(ClamBCLogicalCompiler()); + return true; + } + return false; + }); + }}; } + +} // namespace ClamBCLogicalCompiler diff --git a/libclambcc/ClamBCLogicalCompilerHelper.cpp b/libclambcc/ClamBCLogicalCompilerHelper.cpp new file mode 100644 index 00000000000..27a33b6db23 --- /dev/null +++ b/libclambcc/ClamBCLogicalCompilerHelper.cpp @@ -0,0 +1,221 @@ +/* + * Compile LLVM bytecode to logical signatures. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "bytecode_api.h" +#include "ClamBCDiagnostics.h" +#include "ClamBCCommon.h" +#include "ClamBCUtilities.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Since the logical compiler requires 'setvirusname' to only be called with a string constant, + * we are going to undo the PHI nodes added by O3 that would have to + * + * + * Consider the code + + return.sink.split: ; preds = %if.end39, %for.end + %.str.1.sink = phi ptr [ @.str, %for.end ], [ @.str.1, %if.end39 ] + %call.i70 = call i32 @setvirusname(ptr noundef nonnull %.str.1.sink, i32 noundef 0) #6 + br label %return + + We will just add the calls to setvirusname to the predecessor basic blocks. + * + * + */ + +#define DEBUG_TYPE "lsigcompilerhelper" + +using namespace llvm; + +namespace ClamBCLogicalCompilerHelper +{ + +class ClamBCLogicalCompilerHelper : public PassInfoMixin +{ + public: + ClamBCLogicalCompilerHelper() {} + + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM); + virtual void getAnalysisUsage(AnalysisUsage &AU) const + { + } + + protected: + llvm::Module *pMod = nullptr; + std::vector erase; + bool bChanged = false; + + virtual void populateArgs(const CallInst *pci, std::vector &args) + { + for (auto i = pci->arg_begin(), e = pci->arg_end(); i != e; i++) { + args.push_back(llvm::dyn_cast(i)); + } + } + virtual void processPHI(PHINode *phi, Function *pCalledFunction, std::vector &args); + + virtual void fixupSetVirusNameCalls(); + + size_t getBranchIdx(llvm::BranchInst *pBranch, llvm::BasicBlock *pBB); +}; + +size_t ClamBCLogicalCompilerHelper::getBranchIdx(llvm::BranchInst *pBranch, llvm::BasicBlock *pBB) +{ + for (size_t ret = 0; ret < pBranch->getNumSuccessors(); ret++) { + if (pBranch->getSuccessor(ret) == pBB) { + return ret; + } + } + + ClamBCStop("Branch Instruction is not a predecessor to phi.", pBranch); + + return -1; +} + +/* + * Add calls to setvirusname for each constant string, rather allowing a phinode to + * choose the string. This is a requirement for ClamBCLogicalCompiler. + */ +void ClamBCLogicalCompilerHelper::processPHI(PHINode *phi, Function *pCalledFunction, std::vector &args) +{ + + for (size_t i = 0; i < phi->getNumIncomingValues(); i++) { + BasicBlock *pBB = phi->getIncomingBlock(i); + Value *pVal = phi->getIncomingValue(i); + + Instruction *pTerm = pBB->getTerminator(); + BranchInst *pBranch = llvm::cast(pTerm); /*I know this is a BranchInst, + and not a ReturnInst, because + it is a predecessor block to + my phi node, so no need for + a dyn_cast*/ + size_t branchIdx = getBranchIdx(pBranch, phi->getParent()); + + BasicBlock *pNew = BasicBlock::Create(pMod->getContext(), + "ClamBCLogicalCompilerHelper_call_SetVirusName_", phi->getParent()->getParent(), phi->getParent()); + pBranch->setSuccessor(branchIdx, pNew); + + args[0] = pVal; + + CallInst::Create(pCalledFunction->getFunctionType(), pCalledFunction, args, "ClamBCLogicalCompilerHelper_callInst", pNew); + BranchInst::Create(phi->getParent(), pNew); + } +} + +/* + * Find all calls to setvirusname, and make sure they aren't loading the + * first argument from a variable. + */ +void ClamBCLogicalCompilerHelper::fixupSetVirusNameCalls() +{ + + std::vector calls; + Function *svn = pMod->getFunction("setvirusname"); + if (nullptr == svn) { + return; + } + for (auto iter : svn->users()) { + if (CallInst *pci = llvm::dyn_cast(iter)) { + Value *operand = pci->getOperand(0); + + if (PHINode *phi = llvm::dyn_cast(operand)) { + calls.push_back(pci); + } + } + } + + for (size_t i = 0; i < calls.size(); i++) { + CallInst *pci = calls[i]; + PHINode *phi = llvm::dyn_cast(pci->getOperand(0)); + std::vector args; + populateArgs(pci, args); + processPHI(phi, svn, args); + + erase.push_back(pci); + erase.push_back(phi); + } + + for (size_t i = 0; i < erase.size(); i++) { + erase[i]->eraseFromParent(); + } +} + +PreservedAnalyses ClamBCLogicalCompilerHelper::run(Module &mod, ModuleAnalysisManager &mam) +{ + pMod = &mod; + + fixupSetVirusNameCalls(); + + if (bChanged) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); +} + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLogicalCompilerHelper", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lcompiler-helper") { + FPM.addPass(ClamBCLogicalCompilerHelper()); + return true; + } + return false; + }); + }}; +} + +} // namespace ClamBCLogicalCompilerHelper diff --git a/libclambcc/ClamBCLowering/ClamBCLowering.cpp b/libclambcc/ClamBCLowering.cpp similarity index 76% rename from libclambcc/ClamBCLowering/ClamBCLowering.cpp rename to libclambcc/ClamBCLowering.cpp index 95ce6315029..9d968ddd953 100644 --- a/libclambcc/ClamBCLowering/ClamBCLowering.cpp +++ b/libclambcc/ClamBCLowering.cpp @@ -19,101 +19,18 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#define DEBUG_TYPE "bclowering" -#include +#include "ClamBCLowering.h" + #include "clambc.h" -#include "ClamBCModule.h" - -#include "llvm/ADT/STLExtras.h" -#include "llvm/Analysis/ConstantFolding.h" -#include -#include -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ValueTracking.h" -#include -#include -#include "llvm/CodeGen/IntrinsicLowering.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include "llvm/Support/CommandLine.h" -#include + +#include #include -#include -#include "llvm/Support/FormattedStream.h" -#include "llvm/Support/raw_ostream.h" -#include -#include "llvm/Transforms/Scalar.h" -#include "llvm/CodeGen/IntrinsicLowering.h" using namespace llvm; -namespace -{ -class ClamBCLowering : public ModulePass -{ - public: - static char ID; - ClamBCLowering() - : ModulePass(ID) {} - - virtual ~ClamBCLowering() {} - - virtual llvm::StringRef getPassName() const - { - return "ClamAV Bytecode Lowering"; - } - virtual bool runOnModule(Module &M); - virtual void getAnalysisUsage(AnalysisUsage &AU) const - { - } - - protected: - virtual bool isFinal() = 0; - - private: - void lowerIntrinsics(IntrinsicLowering *IL, Function &F); - void simplifyOperands(Function &F); - void downsizeIntrinsics(Function &F); - void splitGEPZArray(Function &F); - void fixupBitCasts(Function &F); - void fixupGEPs(Function &F); - void fixupPtrToInts(Function &F); -}; - -class ClamBCLoweringNF : public ClamBCLowering +namespace ClamBCLowering { - public: - ClamBCLoweringNF() {} - virtual ~ClamBCLoweringNF() {} - protected: - virtual bool isFinal() - { - return false; - } -}; - -class ClamBCLoweringF : public ClamBCLowering -{ - public: - ClamBCLoweringF() {} - virtual ~ClamBCLoweringF() {} - - protected: - virtual bool isFinal() - { - return true; - } -}; - -char ClamBCLowering::ID = 0; void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) { std::vector prototypesToGen; @@ -156,7 +73,7 @@ void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) Builder.SetInsertPoint(BO); Value *V = Builder.CreatePointerCast(PII->getOperand(0), PointerType::getUnqual(Type::getInt8Ty(F.getContext()))); - V = Builder.CreateGEP(V, Idx); + V = Builder.CreateGEP(V->getType(), V, Idx); V = Builder.CreatePtrToInt(V, BO->getType()); BO->replaceAllUsesWith(V); } else if (GetElementPtrInst *GEPI = dyn_cast(II)) { @@ -178,7 +95,7 @@ void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) if (VSz < 32) { // needs zext, never sext (as index cannot be negative) V2 = Builder.CreateZExtOrBitCast(V, Type::getInt32Ty(C)); - } else if (VSz == 32) { //possible through CastInst path + } else if (VSz == 32) { // possible through CastInst path // pass-through V2 = V; } else { // VSz > 32 @@ -208,7 +125,6 @@ void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) GetElementPtrInst *GEP = dyn_cast(PI->getOperand(0)); if (GEP && GEP->getNumOperands() == 2) { Value *V1 = GEP->getOperand(1); - //if (GEP->getType()->getElementType() == Type::getInt8Ty(F.getContext())) { if (GEP->getSourceElementType() == Type::getInt8Ty(F.getContext())) { Value *P0 = Builder.CreatePtrToInt(GEP->getOperand(0), V1->getType()); @@ -284,7 +200,7 @@ void ClamBCLowering::simplifyOperands(Function &F) if (ConstantExpr *CE = dyn_cast(II->getOperand(i))) { if (CE->getOpcode() == Instruction::GetElementPtr) { // rip out GEP expr and load it - Ops.push_back(new LoadInst(CE, "gepex_load", SI)); + Ops.push_back(new LoadInst(CE->getType(), CE, "gepex_load", SI)); Changed = true; } } else { @@ -368,47 +284,47 @@ static inline void addIntrinsicFunctions(llvm::Module *pMod, Intrinsic::getDeclaration(pMod, Intrinsic::memmove, {i8Ptr, i8Ptr, i32, i1}))); } -static llvm::Value *getReplacementSizeOperand(llvm::CallSite &CS, llvm::Value *Len) +static llvm::Value *getReplacementSizeOperand(llvm::CallInst *pCallInst, llvm::Value *Len) { - llvm::LLVMContext &Context = CS.getParent()->getParent()->getParent()->getContext(); - Value *NewLen = NULL; + LLVMContext &context = pCallInst->getParent()->getParent()->getParent()->getContext(); + Value *NewLen = NULL; if (ConstantInt *C = dyn_cast(Len)) { - NewLen = ConstantInt::get(Type::getInt32Ty(Context), + NewLen = ConstantInt::get(Type::getInt32Ty(context), C->getValue().getLimitedValue((1ULL << 32) - 1)); } else { - NewLen = new TruncInst(Len, Type::getInt32Ty(Context), "lvl_dwn", CS.getInstruction()); + NewLen = new TruncInst(Len, Type::getInt32Ty(context), "lvl_dwn", pCallInst); } return NewLen; } -static void populateArgumentList(llvm::CallSite &CS, llvm::Value *newLen, size_t idx, std::vector &Ops) +static void populateArgumentList(llvm::CallInst *pCallInst, llvm::Value *newLen, size_t idx, std::vector &Ops) { - for (unsigned i = 0; i < CS.arg_size(); ++i) { + for (unsigned i = 0; i < pCallInst->arg_size(); ++i) { if (i == idx) { Ops.push_back(newLen); } else { - Ops.push_back(CS.getArgument(i)); + Ops.push_back(pCallInst->getArgOperand(i)); } } } -static bool replaceIntrinsicCalls(llvm::MemIntrinsic *MI, std::pair rep, size_t idx) +static bool replaceIntrinsicCalls(llvm::MemIntrinsic *pMemIntrinsic, std::pair rep, size_t idx) { - llvm::Function *pCalled = MI->getCalledFunction(); + llvm::Function *pCalled = pMemIntrinsic->getCalledFunction(); { if (rep.first == pCalled) { - llvm::CallSite CS(MI); - Value *Len = CS.getArgument(2); - llvm::Value *newLen = getReplacementSizeOperand(CS, Len); + // llvm::CallSite CS(MI); + Value *Len = pMemIntrinsic->getArgOperand(2); + llvm::Value *newLen = getReplacementSizeOperand(pMemIntrinsic, Len); std::vector args; - populateArgumentList(CS, newLen, idx, args); + populateArgumentList(pMemIntrinsic, newLen, idx, args); assert(args.size() == 4 && "malformed intrinsic call!"); - llvm::Instruction *i = CallInst::Create(rep.second, args, MI->getName(), MI); + llvm::Instruction *i = CallInst::Create(rep.second, args, pMemIntrinsic->getName(), pMemIntrinsic); assert(i && "Failed to create new CallInst"); return true; @@ -417,11 +333,10 @@ static bool replaceIntrinsicCalls(llvm::MemIntrinsic *MI, std::pair InstDel; std::vector> repPairs; @@ -445,8 +360,8 @@ void ClamBCLowering::downsizeIntrinsics(Function &F) } } -//There is no guarantee that the alloca's will all be at the beginning of the block -// so don't stop when we see a non-alloca +// There is no guarantee that the alloca's will all be at the beginning of the block +// so don't stop when we see a non-alloca static void gatherAllocasWithBitcasts(llvm::BasicBlock *bb, std::vector &allocas) { for (auto i = bb->begin(), e = bb->end(); i != e; i++) { @@ -458,7 +373,7 @@ static void gatherAllocasWithBitcasts(llvm::BasicBlock *bb, std::vector [#uses=2] @@ -468,10 +383,10 @@ static void gatherAllocasWithBitcasts(llvm::BasicBlock *bb, std::vector [#uses=1] - * %base_gepz = getelementptr [264 x i8]* %0, i32 0, i32 0 ; [#uses=3] + * %base_gepz = getelementptr [264 x i8]* %0, i32 0, i32 0 ; [#uses=3] * %bcastrr = bitcast i8* %base_gepz to [264 x i8]* ; <[264 x i8]*> [#uses=2] * ... - * %18 = bitcast [264 x i8]* %bcastrr to i8* ; [#uses=0] + * %18 = bitcast [264 x i8]* %bcastrr to i8* ; [#uses=0] * %call133 = call i32 @rc4_stream_setup(i8* %base_gepz, i32 264, i8* %base_gepz22, i32 32) ; [#uses=1] * */ @@ -496,19 +411,9 @@ void ClamBCLowering::fixupBitCasts(Function &F) continue; } - /*aragusa - * I am getting an assertion failure trying to cast a value that is not an ArrayType - * to an ArrayType. I don't fully understand the reason for doing what we are doing here. - * I am just going to check if AI->getAllocatedType is an array type. I may need to revisit this later. - */ if (not llvm::isa(AI->getAllocatedType())) { continue; } - /*Intentionally leaving this debug message in, because I don't think this code is executed very often, and - * I don't believe it is necessary. Once I get the bugs ironed out of the header files, I am going to - * see if this ever prints and does not have an assertion failure. The iterators were previously not working - * correctly and in fixing them, I believe I turned on code that wasn't previously working.*/ - const ArrayType *arTy = cast(AI->getAllocatedType()); Type *APTy = PointerType::getUnqual(arTy->getElementType()); @@ -517,7 +422,6 @@ void ClamBCLowering::fixupBitCasts(Function &F) AIC->setName("ClamBCLowering_fixupBitCasts"); BasicBlock::iterator IP = AI->getParent()->begin(); while (isa(IP)) ++IP; - //Value *Idx[] = {Zero, Zero}; llvm::ArrayRef Idxs = {Zero, Zero}; V = GetElementPtrInst::Create(nullptr, AIC, Idxs, "base_gepz", AI); @@ -545,7 +449,6 @@ void ClamBCLowering::fixupGEPs(Function &F) std::vector indexes; GetElementPtrInst::op_iterator J = GEPI->idx_begin(), JE = GEPI->idx_end(); for (; J != JE; ++J) { - //llvm::Value * v = llvm::cast(J); // push all constants if (Constant *C = dyn_cast(*J)) { indexes.push_back(C); @@ -556,10 +459,7 @@ void ClamBCLowering::fixupGEPs(Function &F) 0)); break; } - Constant *C = cast(GEPI->getOperand(0)); - //Constant *GC = ConstantExpr::getInBoundsGetElementPtr(C, - // &indexes[0], - // indexes.size()); + Constant *C = cast(GEPI->getOperand(0)); Constant *GC = ConstantExpr::getInBoundsGetElementPtr(nullptr, C, indexes); if (J != JE) { @@ -567,11 +467,10 @@ void ClamBCLowering::fixupGEPs(Function &F) for (; J != JE; ++J) { indexes.push_back(*J); } - //AllocaInst *AI = new AllocaInst(GC->getType(), "", Entry->begin()); AllocaInst *AI = new AllocaInst(GC->getType(), 0, "ClamBCLowering_fixupGEPs", llvm::cast(Entry->begin())); new StoreInst(GC, AI, GEPI); - Value *L = new LoadInst(AI, "ClamBCLowering_fixupGEPs", GEPI); - Value *V = GetElementPtrInst::CreateInBounds(L, indexes, "ClamBCLowering_fixupGEPs", GEPI); + Value *L = new LoadInst(AI->getType(), AI, "ClamBCLowering_fixupGEPs", GEPI); + Value *V = GetElementPtrInst::CreateInBounds(L->getType(), L, indexes, "ClamBCLowering_fixupGEPs", GEPI); GEPI->replaceAllUsesWith(V); GEPI->eraseFromParent(); } else { @@ -598,7 +497,7 @@ void ClamBCLowering::fixupPtrToInts(Function &F) for (std::vector::iterator I = insts.begin(), E = insts.end(); I != E; ++I) { PtrToIntInst *PI = *I; - //Builder.SetInsertPoint(PI->getParent(), PI); + // Builder.SetInsertPoint(PI->getParent(), PI); Builder.SetInsertPoint(PI); Value *PI2 = Builder.CreatePtrToInt(PI->getOperand(0), I64Ty); Value *R = Builder.CreateTrunc(PI2, I32Ty); @@ -629,7 +528,7 @@ void ClamBCLowering::splitGEPZArray(Function &F) continue; } const PointerType *Ty = cast(GEPI->getPointerOperand()->getType()); - const ArrayType *ATy = dyn_cast(Ty->getElementType()); + const ArrayType *ATy = dyn_cast(Ty->getArrayElementType()); if (!ATy) { continue; } @@ -637,18 +536,21 @@ void ClamBCLowering::splitGEPZArray(Function &F) Constant *Zero = ConstantInt::get(Type::getInt32Ty(Ty->getContext()), 0); Value *VZ[] = {Zero, Zero}; // transform GEPZ: [4 x i16]* %p, 0, %i -> GEP1 i16* (bitcast)%p, %i - Value *C = GetElementPtrInst::CreateInBounds(GEPI->getPointerOperand(), VZ, "ClamBCLowering_splitGEPZArray", GEPI); - Value *NG = GetElementPtrInst::CreateInBounds(C, V, "ClamBCLowering_splitGEPZArray", GEPI); + Value *C = GetElementPtrInst::CreateInBounds(GEPI->getPointerOperand()->getType(), GEPI->getPointerOperand(), VZ, "ClamBCLowering_splitGEPZArray", GEPI); + Value *NG = GetElementPtrInst::CreateInBounds(C->getType(), C, V, "ClamBCLowering_splitGEPZArray", GEPI); GEPI->replaceAllUsesWith(NG); GEPI->eraseFromParent(); } } } -bool ClamBCLowering::runOnModule(Module &M) +PreservedAnalyses ClamBCLowering::run(Module &m, ModuleAnalysisManager &MAM) { - for (Module::iterator I = M.begin(), E = M.end(); + pMod = &m; + pContext = &(pMod->getContext()); + + for (Module::iterator I = pMod->begin(), E = pMod->end(); I != E; ++I) { if (I->isDeclaration()) continue; @@ -663,14 +565,7 @@ bool ClamBCLowering::runOnModule(Module &M) } } - return true; + return PreservedAnalyses::none(); } -} // namespace - -static RegisterPass X("clambc-lowering-notfinal", "ClamBC Lowering Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -static RegisterPass XX("clambc-lowering-final", "ClamBC Lowering Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCLowering.h b/libclambcc/ClamBCLowering.h new file mode 100644 index 00000000000..86e3550e10c --- /dev/null +++ b/libclambcc/ClamBCLowering.h @@ -0,0 +1,61 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +// #define DEBUG_TYPE "bclowering" + +#include +#include + +namespace ClamBCLowering +{ + +class ClamBCLowering : public llvm::PassInfoMixin +{ + public: + ClamBCLowering() {} + + virtual ~ClamBCLowering() {} + + virtual llvm::StringRef getPassName() const + { + return "ClamAV Bytecode Lowering"; + } + virtual llvm::PreservedAnalyses run(llvm::Module &m, llvm::ModuleAnalysisManager &MAM); + virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const + { + } + + protected: + virtual bool isFinal() = 0; + llvm::LLVMContext *pContext = nullptr; + llvm::Module *pMod = nullptr; + + private: + void lowerIntrinsics(llvm::IntrinsicLowering *IL, llvm::Function &F); + void simplifyOperands(llvm::Function &F); + void downsizeIntrinsics(llvm::Function &F); + void splitGEPZArray(llvm::Function &F); + void fixupBitCasts(llvm::Function &F); + void fixupGEPs(llvm::Function &F); + void fixupPtrToInts(llvm::Function &F); +}; + +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCLoweringF.cpp b/libclambcc/ClamBCLoweringF.cpp new file mode 100644 index 00000000000..b5de956aafc --- /dev/null +++ b/libclambcc/ClamBCLoweringF.cpp @@ -0,0 +1,65 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCLowering.h" + +#include "clambc.h" + +#include + +using namespace llvm; + +namespace ClamBCLowering +{ + +class ClamBCLoweringF : public ClamBCLowering +{ + public: + ClamBCLoweringF() {} + virtual ~ClamBCLoweringF() {} + + protected: + virtual bool isFinal() + { + return true; + } +}; + +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLowering", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lowering-final") { + FPM.addPass(ClamBCLoweringF()); + return true; + } + return false; + }); + }}; +} + +} // namespace ClamBCLowering diff --git a/libclambcc/ClamBCLoweringNF.cpp b/libclambcc/ClamBCLoweringNF.cpp new file mode 100644 index 00000000000..8b26b3ff725 --- /dev/null +++ b/libclambcc/ClamBCLoweringNF.cpp @@ -0,0 +1,66 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Softwaref + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "ClamBCLowering.h" + +#include "clambc.h" + +#include + +using namespace llvm; + +namespace ClamBCLowering +{ + +class ClamBCLoweringNF : public ClamBCLowering +{ + public: + ClamBCLoweringNF() {} + virtual ~ClamBCLoweringNF() {} + + protected: + virtual bool isFinal() + { + return false; + } +}; + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCLowering", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-lowering-notfinal") { + FPM.addPass(ClamBCLoweringNF()); + return true; + } + return false; + }); + }}; +} + +} // namespace ClamBCLowering diff --git a/libclambcc/Common/ClamBCModule.h b/libclambcc/ClamBCModule.h similarity index 94% rename from libclambcc/Common/ClamBCModule.h rename to libclambcc/ClamBCModule.h index a4a80c236de..8798da008c0 100644 --- a/libclambcc/Common/ClamBCModule.h +++ b/libclambcc/ClamBCModule.h @@ -21,19 +21,21 @@ */ #ifndef CLAMBC_MODULE_H #define CLAMBC_MODULE_H -#include -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringMap.h" -#include "llvm/ADT/StringRef.h" -#include "llvm/ADT/Twine.h" + +#include "clambc.h" + +#include +#include +#include +#include +#include #include #include -#include "llvm/Support/raw_ostream.h" +#include + #include #include - -#include "clambc.h" +#include class ClamBCWriter; class ClamBCRegAlloc; @@ -154,12 +156,6 @@ class ClamBCModule : public llvm::ModulePass virtual bool runOnModule(llvm::Module &M); virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const; -#if 0 - static void stop(const llvm::Twine &Msg, const llvm::Module *M); - static void stop(const llvm::Twine &Msg, const llvm::Function *F); - static void stop(const llvm::Twine &Msg, const llvm::Instruction *I); -#endif - void printNumber(uint64_t n, bool constant = false) { printNumber(Out, n, constant); diff --git a/libclambcc/ClamBCModule/ClamBCModule.cpp b/libclambcc/ClamBCModule/ClamBCModule.cpp deleted file mode 100644 index 33de28eb598..00000000000 --- a/libclambcc/ClamBCModule/ClamBCModule.cpp +++ /dev/null @@ -1,30 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "clambc.h" - -using namespace llvm; - -namespace -{ -struct ClamBCModule : public FunctionPass { - static char ID; - ClamBCModule() - : FunctionPass(ID) {} - - bool runOnFunction(Function &F) override - { - return false; - } -}; // end of struct ClamBCModule -} // end of anonymous namespace - -char ClamBCModule::ID = 0; -static RegisterPass X("clambc-module", "ClamBCModule Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp b/libclambcc/ClamBCOutlineEndiannessCalls.cpp similarity index 57% rename from libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp rename to libclambcc/ClamBCOutlineEndiannessCalls.cpp index 2048ecb7ce7..ccaffa16872 100644 --- a/libclambcc/ClamBCOutlineEndiannessCalls/ClamBCOutlineEndiannessCalls.cpp +++ b/libclambcc/ClamBCOutlineEndiannessCalls.cpp @@ -1,20 +1,22 @@ +#include "clambc.h" + #include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include -#include "Common/clambc.h" +#include +#include using namespace llvm; namespace { -class OutlineEndniassCalls : public ModulePass +class ClamBCOutlineEndiannessCalls : public PassInfoMixin { protected: bool bChanged = false; @@ -25,7 +27,7 @@ class OutlineEndniassCalls : public ModulePass for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { CallInst* pCall = llvm::dyn_cast(i); if (pCall) { - if ("__is_bigendian" == pCall->getCalledValue()->getName()) { + if ("__is_bigendian" == pCall->getCalledFunction()->getName()) { calls.push_back(pCall); } } @@ -71,41 +73,57 @@ class OutlineEndniassCalls : public ModulePass pNew->addFnAttr(Attribute::OptimizeNone); pNew->addFnAttr(Attribute::NoInline); - //TODO: Test with NoInline, but not OptimizeNone (Hopefully I can have the function return 1 or 0, and - //not have to actually call the function. + // TODO: Test with NoInline, but not OptimizeNone (Hopefully I can have the function return 1 or 0, and + // not have to actually call the function. return pNew; } public: static char ID; - OutlineEndniassCalls() - : ModulePass(ID) {} + ClamBCOutlineEndiannessCalls() {} - virtual bool runOnModule(Module& m) override + virtual PreservedAnalyses run(Module& m, ModuleAnalysisManager& MAM) { pMod = &m; std::vector calls = findCalls(); if (0 == calls.size()) { - return false; + return PreservedAnalyses::all(); } Function* pNew = getNewEndiannessFunction(calls[0]); for (size_t i = 0; i < calls.size(); i++) { - CallInst* pNewCall = CallInst::Create(pNew, "OutlineEndniassCalls_", calls[i]); + CallInst* pNewCall = CallInst::Create(pNew, "ClamBCOutlineEndiannessCalls_", calls[i]); calls[i]->replaceAllUsesWith(pNewCall); calls[i]->eraseFromParent(); } - return bChanged; + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } -}; // end of struct OutlineEndniassCalls +}; // end of struct ClamBCOutlineEndiannessCalls } // end of anonymous namespace -char OutlineEndniassCalls::ID = 0; -static RegisterPass X("clambc-outline-endianness-calls", "OutlineEndniassCalls TEST Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCOutlineEndiannessCalls", "v0.1", + [](PassBuilder& PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager& FPM, + ArrayRef) { + if (Name == "clambc-outline-endianness-calls") { + FPM.addPass(ClamBCOutlineEndiannessCalls()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp b/libclambcc/ClamBCPrepareGEPsForWriter.cpp similarity index 89% rename from libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp rename to libclambcc/ClamBCPrepareGEPsForWriter.cpp index 8033f1f50b4..a35e266edbf 100644 --- a/libclambcc/ClamBCPrepareGEPsForWriter/ClamBCPrepareGEPsForWriter.cpp +++ b/libclambcc/ClamBCPrepareGEPsForWriter.cpp @@ -19,14 +19,14 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" +#include "bytecode_api.h" #include "clambc.h" #include "ClamBCModule.h" -#include "ClamBCAnalyzer/ClamBCAnalyzer.h" -#include "Common/ClamBCUtilities.h" +#include "ClamBCUtilities.h" + +#include "ClamBCAnalyzer.h" #include -//#include "ClamBCTargetMachine.h" #include #include #include @@ -37,7 +37,6 @@ #include #include #include -//#include "llvm/Config/config.h" #include #include #include @@ -47,21 +46,21 @@ #include #include #include +#include +#include #include using namespace llvm; -class ClamBCPrepareGEPsForWriter : public ModulePass -{ +struct ClamBCPrepareGEPsForWriter : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; public: static char ID; - explicit ClamBCPrepareGEPsForWriter() - : ModulePass(ID) {} + explicit ClamBCPrepareGEPsForWriter() {} virtual ~ClamBCPrepareGEPsForWriter() {} @@ -232,10 +231,10 @@ class ClamBCPrepareGEPsForWriter : public ModulePass Value *gepiNew = underlyingObject; if (gepiNew->getType()->getPointerElementType()->isArrayTy()) { - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, Idxs, "processGEPI_2_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, Idxs, "processGEPI_2_", pgepi); } - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, vCnt, "processGEPI_3_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, vCnt, "processGEPI_3_", pgepi); CastInst *ciNew = CastInst::CreatePointerCast(gepiNew, pgepi->getType(), "processGEPI_", pgepi); @@ -305,10 +304,10 @@ class ClamBCPrepareGEPsForWriter : public ModulePass Value *gepiNew = underlyingObject; if (gepiNew->getType()->getPointerElementType()->isArrayTy()) { - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, Idxs, "processGEPI_0_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, Idxs, "processGEPI_0_", pgepi); } - gepiNew = GetElementPtrInst::Create(nullptr, gepiNew, vCnt, "processGEPI_1_", pgepi); + gepiNew = GetElementPtrInst::Create(gepiNew->getType()->getPointerElementType(), gepiNew, vCnt, "processGEPI_1_", pgepi); CastInst *ciNew = CastInst::CreatePointerCast(gepiNew, pgepi->getType(), "processGEPI_", pgepi); @@ -372,7 +371,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass } } - virtual bool runOnModule(Module &m) + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { pMod = &m; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -387,7 +386,7 @@ class ClamBCPrepareGEPsForWriter : public ModulePass fixCasts(pFunc); } - return true; + return PreservedAnalyses::none(); } virtual void fixCasts(Function *pFunc) @@ -417,12 +416,21 @@ class ClamBCPrepareGEPsForWriter : public ModulePass } }; -char ClamBCPrepareGEPsForWriter::ID = 0; -static RegisterPass X("clambc-prepare-geps-for-writer", "ClamBCPrepareGEPsForWriter Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); - -llvm::ModulePass *createClamBCPrepareGEPsForWriter() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCPrepareGEPsForWriter(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCPrepareGEPsForWriter", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-prepare-geps-for-writer") { + FPM.addPass(ClamBCPrepareGEPsForWriter()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp b/libclambcc/ClamBCPreserveABIs.cpp similarity index 78% rename from libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp rename to libclambcc/ClamBCPreserveABIs.cpp index d735be06361..4b6fcdc0c2c 100644 --- a/libclambcc/ClamBCPreserveABIs/ClamBCPreserveABIs.cpp +++ b/libclambcc/ClamBCPreserveABIs.cpp @@ -1,17 +1,18 @@ +#include "clambc.h" +#include "ClamBCUtilities.h" + #include -#include "llvm/IR/Module.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include #include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" +#include +#include +#include #include #include @@ -32,7 +33,7 @@ namespace * to fake functions. If it does find it (the second time), it removes those * calls. */ -class ClamBCPreserveABIs : public ModulePass +class ClamBCPreserveABIs : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; @@ -46,9 +47,9 @@ class ClamBCPreserveABIs : public ModulePass return; } FunctionType *pFunctionType = llvm::dyn_cast(pFunc->getType()); - std::string newname = pFunc->getName(); + std::string newname(pFunc->getName()); + pFunctionType = pFunc->getFunctionType(); newname += "_fake"; - pFunctionType = llvm::cast(llvm::cast(pFunc->getType())->getElementType()); Function *fakeFunction = Function::Create(pFunctionType, Function::ExternalLinkage, newname, pFunc->getParent()); fakeFunctions.push_back(fakeFunction); std::vector args; @@ -127,18 +128,16 @@ class ClamBCPreserveABIs : public ModulePass } public: - static char ID; - ClamBCPreserveABIs() - : ModulePass(ID) {} + ClamBCPreserveABIs() {} virtual ~ClamBCPreserveABIs() {} - bool runOnModule(Module &m) override + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { pMod = &m; if (removeFakeFunctions()) { - return bChanged; + return PreservedAnalyses::none(); } for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -157,13 +156,30 @@ class ClamBCPreserveABIs : public ModulePass writeMetadata(); - return bChanged; + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of struct ClamBCPreserveABIs } // end of anonymous namespace -char ClamBCPreserveABIs::ID = 0; -static RegisterPass X("clambc-preserve-abis", "Preserve ABIs", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCPreserveABIs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-preserve-abis") { + FPM.addPass(ClamBCPreserveABIs()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp b/libclambcc/ClamBCRebuild.cpp similarity index 82% rename from libclambcc/ClamBCRebuild/ClamBCRebuild.cpp rename to libclambcc/ClamBCRebuild.cpp index c253bbded33..93f3968783b 100644 --- a/libclambcc/ClamBCRebuild/ClamBCRebuild.cpp +++ b/libclambcc/ClamBCRebuild.cpp @@ -19,13 +19,16 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ + +#include "ClamBCModule.h" +#include "clambc.h" +#include "ClamBCUtilities.h" + #include -#include #include #include #include #include -#include #include #include #include @@ -33,23 +36,21 @@ #include #include #include +#include +#include #include #include #include #include #include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" - using namespace llvm; -class ClamBCRebuild : public ModulePass, public InstVisitor +class ClamBCRebuild : public PassInfoMixin, public InstVisitor { public: static char ID; - explicit ClamBCRebuild() - : ModulePass(ID) {} + explicit ClamBCRebuild() {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Backend Rebuilder"; @@ -82,8 +83,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Builder = new IRBuilder(*Context, TF); - SE = nullptr; - Expander = nullptr; + SE = nullptr; visitFunction(F, &NF); for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { @@ -95,7 +95,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor runOnBasicBlock(bb); } - //phase 2: map PHI operands now + // phase 2: map PHI operands now for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (PHINode *N = dyn_cast(&*I)) { PHINode *PN = dyn_cast(VMap[N]); @@ -104,6 +104,13 @@ class ClamBCRebuild : public ModulePass, public InstVisitor for (unsigned i = 0; i < N->getNumIncomingValues(); i++) { Value *V = mapPHIValue(N->getIncomingValue(i)); BasicBlock *BB = mapBlock(N->getIncomingBlock(i)); + + if (V->getType() != N->getType()) { + if (V->getType()->isPointerTy() and N->getType()->isPointerTy()) { + V = CastInst::CreatePointerCast(V, N->getType(), + "ClamBCRebuild_fixCast_", BB->getTerminator()); + } + } PN->addIncoming(V, BB); } assert(PN->getNumIncomingValues() > 0); @@ -114,9 +121,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor fixupCalls(F, copy); F->setLinkage(GlobalValue::InternalLinkage); - if (Expander) { - delete Expander; - } delete Builder; return true; } @@ -146,7 +150,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void fixupCallInst(CallInst *pCallInst, Function *pFunc) { assert(pCallInst->arg_size() == pFunc->arg_size() && "Incorrect number of arguments"); - assert(pCallInst->getCalledValue() == pFunc && "This CallInst doesn't call this function"); auto argIter = pFunc->arg_begin(), argEnd = pFunc->arg_end(); auto callIter = pCallInst->arg_begin(), callEnd = pCallInst->arg_end(); @@ -174,13 +177,13 @@ class ClamBCRebuild : public ModulePass, public InstVisitor } } - bool runOnModule(Module &M) + /*MAIN*/ + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM) { pMod = &M; /* Taken from doInitialization. */ FMap.clear(); - //FMapRev.clear(); Context = &(pMod->getContext()); i8Ty = Type::getInt8Ty(*Context); @@ -188,7 +191,11 @@ class ClamBCRebuild : public ModulePass, public InstVisitor std::vector funcs; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function *pFunc = llvm::cast(i); + Function *pFunc = llvm::cast(i); + const FunctionType *FTy = pFunc->getFunctionType(); + if (FTy->isVarArg()) { + return PreservedAnalyses::all(); + } funcs.push_back(pFunc); } for (size_t i = 0; i < funcs.size(); i++) { @@ -196,7 +203,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor runOnFunction(*pFunc); } - return true; + return PreservedAnalyses::none(); } private: @@ -214,14 +221,12 @@ class ClamBCRebuild : public ModulePass, public InstVisitor ValueMapTy VMap; DenseMap, Value *> CastMap; - ScalarEvolution *SE = nullptr; - Type *i8Ty = nullptr; - Type *i8pTy = nullptr; - //FunctionPassManager *FPM = nullptr; + ScalarEvolution *SE = nullptr; + Type *i8Ty = nullptr; + Type *i8pTy = nullptr; LLVMContext *Context = nullptr; DenseSet visitedBB; IRBuilder *Builder = nullptr; - SCEVExpander *Expander = nullptr; void stop(const std::string &Msg, const llvm::Instruction *I) { @@ -229,19 +234,6 @@ class ClamBCRebuild : public ModulePass, public InstVisitor } friend class InstVisitor; - const Type *getInnerElementType(const CompositeType *CTy) - { - const Type *ETy = nullptr; - // get pointer to first element - do { - assert(CTy->indexValid(0u)); - ETy = CTy->getTypeAtIndex(0u); - CTy = dyn_cast(ETy); - } while (CTy); - assert(ETy->isIntegerTy()); - return ETy; - } - Type *rebuildType(Type *Ty, bool i8only = false) { assert(Ty); @@ -273,7 +265,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Type *Ty = rebuildType(AI.getAllocatedType(), true); if (const ArrayType *ATy = dyn_cast(Ty)) { Ty = ATy->getElementType(); - //TODO: check for overflow + // TODO: check for overflow n *= ATy->getNumElements(); } if (n != 1) @@ -284,7 +276,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Constant *mapConstant(Constant *C) { - //TODO: compute any gep exprs here + // TODO: compute any gep exprs here return C; } @@ -313,8 +305,34 @@ class ClamBCRebuild : public ModulePass, public InstVisitor return NV; } + /* findDuplicateType looks through all the casts of a value to find if it + * is ultimately being casted to a type that it is already casted from. + * If that is the case, it just returns the original, instead of creating + * another cast. + * + * In addition to being inefficient, the excessive casting was causing + * issues in 0.103 and 0.105. + */ + Value *findDuplicateType(Value *v, Type *t) + { + if (BitCastInst *bci = llvm::dyn_cast(v)) { + if (bci->getSrcTy() == t) { + return bci->getOperand(0); + } + + return findDuplicateType(bci->getOperand(0), t); + } + return nullptr; + } + Value *makeCast(Value *V, Type *Ty) { + + Value *v = findDuplicateType(V, Ty); + if (v) { + return v; + } + if (V->getType() == Ty) { return V; } @@ -329,7 +347,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor BasicBlock::iterator thisP = Builder->GetInsertPoint(); BasicBlock *targetBB = I->getParent(); if (thisBB != targetBB) { - //BasicBlock::iterator IP = I; + // BasicBlock::iterator IP = I; BasicBlock::iterator IP(I); ++IP; while (isa(IP)) ++IP; @@ -347,7 +365,8 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Value *PV = mapValue(P); if (PV->getType() == Ty && !isa(PV)) { assert(!isa(PV) || - cast(Ty)->getElementType()->isIntegerTy()); + Ty->getPointerElementType()->isIntegerTy()); + return PV; } PV = PV->stripPointerCasts(); @@ -405,10 +424,10 @@ class ClamBCRebuild : public ModulePass, public InstVisitor Value *op1 = mapValue(I.getOperand(1)); /* - * bb#11515: Structure pointers are translated to uint8_t* pointers - * but constants are kept to their original type so a type - * conversion may be necessary on a icmp inst with a constant - */ + * bb#11515: Structure pointers are translated to uint8_t* pointers + * but constants are kept to their original type so a type + * conversion may be necessary on a icmp inst with a constant + */ if (op0->getType() != op1->getType()) { if (isa(op0)) op0 = makeCast(op0, op1->getType()); @@ -425,7 +444,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void visitLoadInst(LoadInst &I) { Value *P = I.getPointerOperand(); - VMap[&I] = Builder->CreateLoad(mapPointer(P, P->getType()), + VMap[&I] = Builder->CreateLoad(I.getType(), mapPointer(P, P->getType()), I.getName()); } @@ -439,7 +458,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void visitGetElementPtrInst(GetElementPtrInst &II) { if (II.hasAllZeroIndices()) { - //just a bitcast + // just a bitcast VMap[&II] = mapPointer(II.getOperand(0), rebuildType(II.getType())); return; } @@ -451,12 +470,18 @@ class ClamBCRebuild : public ModulePass, public InstVisitor I != E; ++I) { idxs.push_back(mapValue(*I)); } + + Type *pt = P->getType(); + if (llvm::isa(pt)) { + pt = pt->getPointerElementType(); + } + if (II.isInBounds()) { - //P = Builder->CreateInBoundsGEP(P, idxs.begin(), idxs.end()); - P = Builder->CreateInBoundsGEP(P, idxs, "clambcRebuildInboundsGEP"); + // P = Builder->CreateInBoundsGEP(P, idxs.begin(), idxs.end()); + P = Builder->CreateInBoundsGEP(pt, P, idxs, "clambcRebuildInboundsGEP"); } else { - //P = Builder->CreateGEP(P, idxs.begin(), idxs.end()); - P = Builder->CreateGEP(P, idxs, "clambcRebuildGEP"); + // P = Builder->CreateGEP(P, idxs.begin(), idxs.end()); + P = Builder->CreateGEP(pt, P, idxs, "clambcRebuildGEP"); } VMap[&II] = makeCast(P, rebuildType(II.getType())); ; @@ -478,7 +503,7 @@ class ClamBCRebuild : public ModulePass, public InstVisitor void visitPHINode(PHINode &I) { VMap[&I] = Builder->CreatePHI(I.getType(), 0, "ClamBCRebuild_phi_visitPHINode_"); - //2nd phase will map the operands + // 2nd phase will map the operands } void visitCastInst(CastInst &I) @@ -515,6 +540,12 @@ class ClamBCRebuild : public ModulePass, public InstVisitor if (Ty->isIntegerTy()) { V = Builder->CreateBitCast(V, Ty, "ClamBCRebuild_cast"); } else if (Ty->isPointerTy()) { // A CompositeType + + /*This appears to be necessary for 0.103 on windows.*/ + if (Ty != i8pTy) { + V = Builder->CreatePointerCast(V, i8pTy, "ClamBCRebuild"); + } + V = Builder->CreatePointerCast(V, Ty, "ClamBCRebuild"); } else { stop("Type conversion unhandled in ClamAV Bytecode Backend Rebuilder", &I); @@ -599,13 +630,22 @@ class ClamBCRebuild : public ModulePass, public InstVisitor return ret; } }; -char ClamBCRebuild::ID = 0; - -static RegisterPass X("clambc-rebuild", "ClamBCRebuild Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); -llvm::ModulePass *createClamBCRebuild(void) +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCRebuild(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRebuild", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-rebuild") { + FPM.addPass(ClamBCRebuild()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/Common/ClamBCRegAlloc.cpp b/libclambcc/ClamBCRegAlloc.cpp similarity index 78% rename from libclambcc/Common/ClamBCRegAlloc.cpp rename to libclambcc/ClamBCRegAlloc.cpp index 7bd83c011b7..301e021460a 100644 --- a/libclambcc/Common/ClamBCRegAlloc.cpp +++ b/libclambcc/ClamBCRegAlloc.cpp @@ -19,13 +19,11 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "ClamBCModule.h" +#include "ClamBCRegAlloc.h" #include "ClamBCUtilities.h" #include "clambc.h" #include -//#include "llvm/Analysis/LiveValues.h" -//#include "llvm/Config/config.h" #include #include #include @@ -34,6 +32,7 @@ #include #include #include +#include using namespace llvm; // We do have a virtually unlimited number of registers, but it is more cache @@ -45,8 +44,10 @@ using namespace llvm; // targets with fixed number of registers, and a much simpler allocator // suffices for us. +llvm::AnalysisKey ClamBCRegAllocAnalyzer::Key; + /*TODO: Should rework this so that we are not changing things with open iterators.*/ -void ClamBCRegAlloc::handlePHI(PHINode *PN) +void ClamBCRegAllocAnalysis::handlePHI(PHINode *PN) { BasicBlock *BB = PN->getIncomingBlock(0); for (unsigned i = 1; i < PN->getNumIncomingValues(); i++) { @@ -56,7 +57,7 @@ void ClamBCRegAlloc::handlePHI(PHINode *PN) BasicBlock *pEntry = llvm::cast(pFunc->begin()); Instruction *pFirst = llvm::cast(pEntry->begin()); AllocaInst *AI = new AllocaInst(PN->getType(), pFunc->getAddressSpace(), ".phi", - pFirst); + pFirst); llvm::IRBuilder<> builder(PN->getContext()); unsigned MDDbgKind = PN->getContext().getMDKindID("dbg"); if (MDDbgKind) { @@ -77,18 +78,18 @@ void ClamBCRegAlloc::handlePHI(PHINode *PN) ++It; } while (isa(It)); builder.SetInsertPoint(&*It); - LoadInst *LI = builder.CreateLoad(AI, ".phiload"); + LoadInst *LI = builder.CreateLoad(AI->getAllocatedType(), AI, ".phiload"); builder.SetInstDebugLocation(LI); PN->replaceAllUsesWith(LI); PN->eraseFromParent(); } -bool ClamBCRegAlloc::runOnFunction(Function &F) +bool ClamBCRegAllocAnalysis::runOnFunction(Function &F) { ValueMap.clear(); RevValueMap.clear(); - DT = &getAnalysis().getDomTree(); bool Changed = false; + std::vector pns; for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { BasicBlock &BB = *I; BasicBlock::iterator J = BB.begin(); @@ -97,9 +98,13 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) if (!PN) break; ++J; - handlePHI(PN); + pns.push_back(PN); } } + for (size_t i = 0; i < pns.size(); i++) { + PHINode *PN = pns[i]; + handlePHI(PN); + } unsigned id = 0; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); @@ -108,8 +113,6 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) ValueMap[A] = id; if (RevValueMap.size() == id) { RevValueMap.push_back(A); - } else { - errs() << id << " " << __FILE__ << ":" << __LINE__ << "\n"; } ++id; } @@ -127,22 +130,22 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) ValueMap[II] = ~0u; continue; } + + { + static int first = 1; + if (first) { + first = 0; + } + } if (CastInst *BC = dyn_cast(II)) { if (BitCastInst *BCI = dyn_cast(BC)) { if (!BCI->isLosslessCast()) { ClamBCStop("Non lossless bitcast is not supported", BCI); } - const Type *SrcTy = BC->getOperand(0)->getType(); - const Type *DstTy = BC->getType(); - const PointerType *SPTy, *DPTy; - while ((SPTy = dyn_cast(SrcTy))) { - DPTy = dyn_cast(DstTy); - if (!DPTy) { - ClamBCStop("Cast from pointer to non-pointer element", - BCI); - } - SrcTy = SPTy->getElementType(); - DstTy = DPTy->getElementType(); + + if (BCI->getSrcTy()->isPointerTy() and (not BCI->getDestTy()->isPointerTy())) { + ClamBCStop("Cast from pointer to non-pointer element", + BCI); } if (AllocaInst *AI = dyn_cast(BCI->getOperand(0))) { @@ -151,8 +154,6 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) ValueMap[II] = id; if (RevValueMap.size() == id) { RevValueMap.push_back(II); - } else { - errs() << id << " " << __FILE__ << ":" << __LINE__ << "\n"; } ++id; continue; @@ -161,6 +162,9 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) SkipMap.insert(II); ValueMap[II] = getValueID(II->getOperand(0)); continue; + } else if (llvm::isa(BC) or llvm::isa(BC)) { + ClamBCStop("Cast from pointer to non-pointer element", + BCI); } } if (II->hasOneUse()) { @@ -181,14 +185,6 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) } } // single-use of load from alloca -> use directly value id of alloca - //TODO: we must check for intervening stores here, better use memdep! - /* if (LoadInst *LI = dyn_cast(II)) { - if (AllocaInst *AI = dyn_cast(LI->getPointerOperand())) { - ValueMap[LI] = getValueID(AI); - SkipMap.insert(LI); - continue; - } - }*/ } ValueMap[II] = id; if (RevValueMap.size() == id) { @@ -198,13 +194,13 @@ bool ClamBCRegAlloc::runOnFunction(Function &F) } ++id; } - //TODO: reduce the number of virtual registers used, by using - // an algorithms that walks the dominatortree and does value liveness - // analysis. + // TODO: reduce the number of virtual registers used, by using + // an algorithms that walks the dominatortree and does value liveness + // analysis. return Changed; } -void ClamBCRegAlloc::dump() const +void ClamBCRegAllocAnalysis::dump() const { for (ValueIDMap::const_iterator I = ValueMap.begin(), E = ValueMap.end(); I != E; ++I) { @@ -212,7 +208,7 @@ void ClamBCRegAlloc::dump() const } } -void ClamBCRegAlloc::revdump() const +void ClamBCRegAllocAnalysis::revdump() const { for (unsigned i = 0; i < RevValueMap.size(); ++i) { errs() << i << ": "; @@ -221,7 +217,7 @@ void ClamBCRegAlloc::revdump() const } } -unsigned ClamBCRegAlloc::buildReverseMap(std::vector &reverseMap) +unsigned ClamBCRegAllocAnalysis::buildReverseMap(std::vector &reverseMap) { // Check using the older building code to determine changes due to building difference // Note: this code can be removed if necessary @@ -253,7 +249,7 @@ unsigned ClamBCRegAlloc::buildReverseMap(std::vector &reverseMap) return RevValueMap.size(); } -void ClamBCRegAlloc::getAnalysisUsage(AnalysisUsage &AU) const +void ClamBCRegAllocAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); @@ -261,8 +257,17 @@ void ClamBCRegAlloc::getAnalysisUsage(AnalysisUsage &AU) const // loads/stores. AU.setPreservesCFG(); } -char ClamBCRegAlloc::ID = 0; -static RegisterPass X("clambc-ra", - "ClamAV bytecode register allocator"); -const PassInfo *const ClamBCRegAllocID = &X; +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRegAlloc", "v0.1", + [](PassBuilder &PB) { + PB.registerAnalysisRegistrationCallback( + [](FunctionAnalysisManager &mam) { + mam.registerPass([]() { return ClamBCRegAllocAnalyzer(); }); + }); + }}; +} diff --git a/libclambcc/ClamBCRegAlloc.h b/libclambcc/ClamBCRegAlloc.h new file mode 100644 index 00000000000..8e76d914a40 --- /dev/null +++ b/libclambcc/ClamBCRegAlloc.h @@ -0,0 +1,112 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ +#ifndef CLAMBC_REGALLOC_H +#define CLAMBC_REGALLOC_H + +#include "clambc.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +class ClamBCRegAllocAnalysis +{ + public: + static char ID; + explicit ClamBCRegAllocAnalysis() {} + + unsigned buildReverseMap(std::vector &); + bool skipInstruction(const llvm::Instruction *I) const + { + return SkipMap.count(I); + } + + unsigned getValueID(const llvm::Value *V) const + { + ValueIDMap::const_iterator I = ValueMap.find(V); + if (I == ValueMap.end()) { + DEBUGERR << "Error Value ID requested for unknown value (Printing below).\n"; + DEBUGERR << *V << "\n"; + assert(0 && "Value ID requested for unknown value"); + } + assert(I->second != ~0u && + "Value ID requested for unused/void instruction!"); + return I->second; + } + virtual bool runOnFunction(llvm::Function &F); + virtual void getAnalysisUsage(llvm::AnalysisUsage &AU) const; + void dump() const; + void revdump() const; + + virtual void setDominatorTree(llvm::DominatorTree *dt) + { + DT = dt; + } + + private: + void handlePHI(llvm::PHINode *PN); + typedef llvm::DenseMap ValueIDMap; + ValueIDMap ValueMap; + std::vector RevValueMap; + llvm::DenseSet SkipMap; + llvm::DominatorTree *DT; +}; + +class ClamBCRegAllocAnalyzer : public llvm::AnalysisInfoMixin +{ + + protected: + ClamBCRegAllocAnalysis clamBCRegAllocAnalysis; + + public: + ClamBCRegAllocAnalyzer() {} + virtual ~ClamBCRegAllocAnalyzer() {} + + friend AnalysisInfoMixin; + static llvm::AnalysisKey Key; + typedef ClamBCRegAllocAnalysis Result; + + ClamBCRegAllocAnalysis &run(llvm::Function &F, llvm::FunctionAnalysisManager &fam) + { + + llvm::DominatorTree &dt = fam.getResult(F); + clamBCRegAllocAnalysis.setDominatorTree(&dt); + clamBCRegAllocAnalysis.runOnFunction(F); + clamBCRegAllocAnalysis.setDominatorTree(NULL); + + return clamBCRegAllocAnalysis; + } +}; + +#endif // CLAMBC_REGALLOC_H diff --git a/libclambcc/ClamBCRemoveFSHL.cpp b/libclambcc/ClamBCRemoveFSHL.cpp new file mode 100644 index 00000000000..b4cc0404cf9 --- /dev/null +++ b/libclambcc/ClamBCRemoveFSHL.cpp @@ -0,0 +1,178 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Remove fshl intrinsic because it's not supported by our runtime. + */ +struct ClamBCRemoveFSHL : public PassInfoMixin { + protected: + Module *pMod = nullptr; + + FunctionType *fshlType = nullptr; + + virtual llvm::FunctionType *getFSHLFunctionType(Type *functionArgType) + { + return FunctionType::get(functionArgType, {functionArgType, functionArgType, functionArgType}, false); + } + + virtual llvm::Function *addFunction64(IntegerType *functionArgType, const char *const functionName) + { + /*Will determine if this is necessary during the rc phase.*/ + /* + This is an example function, needs to be converted to IR + static uint8_t fshl8_noshifts(uint8_t left, uint8_t right, uint8_t shift){ + uint8_t ret = 0; + uint8_t bitwidth = 8; + uint8_t bitIdx = (2 * bitwidth) - (shift % bitwidth) - 1; + uint8_t bit; + + for (size_t i = 0; i < bitwidth; i++){ + if (bitIdx >= bitwidth) { + bit = (left & (1 << (bitIdx - bitwidth))) ? 1 : 0; + ret |= (bit << ((bitwidth - 1) - i)); + } else { + bit = right & (1 << bitIdx); + ret |= (bit << ((bitwidth - 1) - i)); + } + bitIdx-- ; + } + + return ret; + } + */ + assert(0 && "Unimplemented"); + } + + /* + * addFunction was based on this. + * static uint8_t fshl8_shifts(uint8_t left, uint8_t right, uint8_t shift){ + * uint16_t tmp = (left << 8) | right; + * tmp <<= (shift % 8); + * tmp = (tmp & 0xff00) >> 8; + * return (uint8_t) (tmp & 0xff); + * } + + */ + virtual llvm::Function *addFunction(IntegerType *functionArgType, const char *const functionName) + { + + if (64 == functionArgType->getBitWidth()) { + return addFunction64(functionArgType, functionName); + } + + FunctionType *ft = getFSHLFunctionType(functionArgType); + IntegerType *i64 = IntegerType::get(pMod->getContext(), 64); + ConstantInt *pciBitWidth = ConstantInt::get(i64, functionArgType->getBitWidth()); + + llvm::Function *fshl = Function::Create(ft, GlobalValue::InternalLinkage, functionName, *pMod); + Value *pLeft = fshl->getArg(0); + Value *pRight = fshl->getArg(1); + Value *pShift = fshl->getArg(2); + BasicBlock *pEntry = BasicBlock::Create(pMod->getContext(), "entry", fshl); + + pLeft = CastInst::CreateZExtOrBitCast(pLeft, i64, "zext_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Shl, pLeft, pciBitWidth, "shl_", pEntry); + pRight = CastInst::CreateZExtOrBitCast(pRight, i64, "zext_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Or, pLeft, pRight, "or", pEntry); + pShift = CastInst::CreateZExtOrBitCast(pShift, i64, "zext_", pEntry); + + pShift = BinaryOperator::Create(Instruction::URem, pShift, pciBitWidth, "urem_", pEntry); + pLeft = BinaryOperator::Create(Instruction::Shl, pLeft, pShift, "shl_", pEntry); + + pLeft = BinaryOperator::Create(Instruction::LShr, pLeft, pciBitWidth, "shr_", pEntry); + pLeft = CastInst::CreateTruncOrBitCast(pLeft, functionArgType, "trunc_", pEntry); + ReturnInst::Create(pMod->getContext(), pLeft, pEntry); + + return fshl; + } + + virtual bool replaceCalls(const char *const intrinsicName, const char *functionName, IntegerType *functionArgType) + { + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()) { + Function *fshl = addFunction(functionArgType, functionName); + replaceAllCalls(getFSHLFunctionType(functionArgType), fshl, calls, "ClamBCRemoveFSHL_"); + + return true; + } + return false; + } + + public: + virtual ~ClamBCRemoveFSHL() {} + + /*TODO: Add this to validator.*/ + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.fshl.i32", ".fshl.i32", Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.fshl.i16", ".fshl.i16", Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.fshl.i8", ".fshl.i8", Type::getInt16Ty(pMod->getContext())); + + if (bRet) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + +}; // end of struct ClamBCRemoveFSHL + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFSHL", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-fshl") { + FPM.addPass(ClamBCRemoveFSHL()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveFreezeInsts.cpp b/libclambcc/ClamBCRemoveFreezeInsts.cpp new file mode 100644 index 00000000000..ea10ef8e7b1 --- /dev/null +++ b/libclambcc/ClamBCRemoveFreezeInsts.cpp @@ -0,0 +1,119 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Freeze Instructions are to guarantee sane behaviour in the case of undefs or poison values. The interpreter + * has no notion of freeze instructions, so we are removing them. The verifier will fail if there are undef or + * poison values in the IR, so this is safe to do. + */ +struct ClamBCRemoveFreezeInsts : public PassInfoMixin { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherFreezeInsts(Function *pFunc, std::vector &freezeInsts) + { + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++) { + BasicBlock *pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { + if (FreezeInst *pfi = llvm::dyn_cast(bi)) { + freezeInsts.push_back(pfi); + } + } + } + } + + virtual void processFunction(Function *pFunc) + { + vector freezeInsts; + gatherFreezeInsts(pFunc, freezeInsts); + + for (size_t i = 0; i < freezeInsts.size(); i++) { + bChanged = true; + + FreezeInst *pfi = freezeInsts[i]; + pfi->replaceAllUsesWith(pfi->getOperand(0)); + pfi->eraseFromParent(); + } + } + + public: + virtual ~ClamBCRemoveFreezeInsts() {} + + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + processFunction(pFunc); + } + + if (bChanged) { + return PreservedAnalyses::none(); + } else { + return PreservedAnalyses::all(); + } + } +}; // end of struct ClamBCRemoveFreezeInsts + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveFreezeInsts", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-freeze-insts") { + FPM.addPass(ClamBCRemoveFreezeInsts()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveICMPSLE.cpp b/libclambcc/ClamBCRemoveICMPSLE.cpp new file mode 100644 index 00000000000..c6db611925b --- /dev/null +++ b/libclambcc/ClamBCRemoveICMPSLE.cpp @@ -0,0 +1,115 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" + +#include +#include +#include + +#include +#include +#include + +#include + +using namespace llvm; +using namespace std; + +/* Modeled after CallGraphAnalysis */ + +namespace +{ +struct ClamBCRemoveICMPSLE : public PassInfoMixin { + protected: + Module *pMod = nullptr; + bool bChanged = false; + + virtual void gatherInstructions(Function *pFunc, std::vector &insts) + { + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { + BasicBlock *pBB = llvm::cast(i); + for (auto bbi = pBB->begin(), bbe = pBB->end(); bbi != bbe; bbi++) { + ICmpInst *inst = llvm::dyn_cast(bbi); + if (inst) { + if (CmpInst::ICMP_SLE == inst->getPredicate()) { + insts.push_back(inst); + } + } + } + } + } + + virtual void processFunction(Function *pFunc) + { + std::vector insts; + gatherInstructions(pFunc, insts); + + for (size_t i = 0; i < insts.size(); i++) { + insts[i]->swapOperands(); + } + } + + public: + virtual ~ClamBCRemoveICMPSLE() {} + + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::dyn_cast(i); + if (pFunc) { + if (pFunc->isDeclaration()) { + continue; + } + + processFunction(pFunc); + } + } + + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); + } +}; // end of struct ClamBCRemoveICMPSLE + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveICMPSLE", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-icmp-sle") { + FPM.addPass(ClamBCRemoveICMPSLE()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp b/libclambcc/ClamBCRemovePointerPHIs.cpp similarity index 60% rename from libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp rename to libclambcc/ClamBCRemovePointerPHIs.cpp index 4c8409641c9..5c00299e1bd 100644 --- a/libclambcc/ClamBCRemovePointerPHIs/ClamBCRemovePointerPHIs.cpp +++ b/libclambcc/ClamBCRemovePointerPHIs.cpp @@ -1,3 +1,6 @@ +#include "clambc.h" +#include "ClamBCUtilities.h" +#include "ClamBCModule.h" #include #include @@ -8,24 +11,26 @@ #include -#include #include +#include +#include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" -#include "Common/ClamBCModule.h" using namespace llvm; #include +/* + * This Pass is only needed for 0.103 on Windows, so when we no longer need to support 0.103, it can be removed. + */ + namespace { -class ClambcRemovePointerPHIs : public FunctionPass +class ClamBCRemovePointerPHIs : public PassInfoMixin { protected: - Function *pFunc = nullptr; + llvm::Module *pMod = nullptr; - std::vector gatherPHIs() + std::vector gatherPHIs(llvm::Function *pFunc) { std::vector ret; @@ -76,9 +81,9 @@ class ClambcRemovePointerPHIs : public FunctionPass if (1 != vals.size()) { /* - * This may not be an issue, but want to investigate it. - * This would be due to the phi node not being the same base pointer with a different index. - */ + * This may not be an issue, but want to investigate it. + * This would be due to the phi node not being the same base pointer with a different index. + */ return nullptr; } else { @@ -178,14 +183,13 @@ class ClambcRemovePointerPHIs : public FunctionPass if (not pn->getType()->isPointerTy()) { return false; } - //std::vector delLst; Value *pBasePtr = findBasePointer(pn); if (nullptr == pBasePtr) { /*No unique base pointer.*/ return false; } - IntegerType *pType = Type::getInt64Ty(pFunc->getParent()->getContext()); + IntegerType *pType = Type::getInt64Ty(pMod->getContext()); Constant *zero = ConstantInt::get(pType, 0); Value *initValue = zero; PHINode *idxNode = PHINode::Create(pType, pn->getNumIncomingValues(), "ClamBCRemovePointerPHIs_idx_", pn); @@ -197,15 +201,15 @@ class ClambcRemovePointerPHIs : public FunctionPass for (size_t i = 0; i < pn->getNumIncomingValues(); i++) { Value *incoming = getOrigValue(pn->getIncomingValue(i)); - //If this value is dependent on the phi node, then it cannot - //be what the PHINode was initialized to the first time the - //block was entered, which is what we are looking for. + // If this value is dependent on the phi node, then it cannot + // be what the PHINode was initialized to the first time the + // block was entered, which is what we are looking for. if (not(phiIsDependent(incoming, pn))) { continue; } if (GetElementPtrInst *p = llvm::dyn_cast(pn->getIncomingValue(i))) { - //Replace initValue with the index operand of the GetElementPtrInst here. + // Replace initValue with the index operand of the GetElementPtrInst here. for (auto idx = p->idx_begin(), idxe = p->idx_end(); idx != idxe; idx++) { initValue = llvm::cast(idx); } @@ -226,7 +230,17 @@ class ClambcRemovePointerPHIs : public FunctionPass std::vector newInsts; Instruction *insPt = findFirstNonPHI(pn->getParent()); - Instruction *gepiNew = GetElementPtrInst::Create(nullptr, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); + if (pBasePtr->getType() != pn->getType()) { + pBasePtr = CastInst::CreatePointerCast(pBasePtr, pn->getType(), "ClamBCRemovePointerPHIs_cast_", insPt); + } + + PointerType *pt = llvm::dyn_cast(pBasePtr->getType()); + if (nullptr == pt) { + assert(0 && "This pass is only for pointer phis, how did we get here???"); + } + Type *elementType = pt->getPointerElementType(); + + Instruction *gepiNew = GetElementPtrInst::Create(elementType, pBasePtr, idxNode, "ClamBCRemovePointerPHIs_gepi_", insPt); if (pn->getType() != gepiNew->getType()) { gepiNew = CastInst::CreatePointerCast(gepiNew, pn->getType(), "ClamBCRemovePointerPHIs_cast_", insPt); } @@ -242,6 +256,10 @@ class ClambcRemovePointerPHIs : public FunctionPass Value *incVal = pgepi->getOperand(1); + if (incVal->getType() != pType) { + incVal = CastInst::CreateIntegerCast(incVal, pType, false, "ClamBCRemovePointerPHIs_cast_", pgepi); + } + Instruction *add = BinaryOperator::Create(Instruction::Add, idxNode, incVal, "ClamBCRemovePointerPHIs_add_", pgepi); BasicBlock *pred = findPredecessor(idxNode->getParent(), pgepi->getParent(), omitNodes); assert(pred && "Could not find predecessor"); @@ -282,34 +300,110 @@ class ClambcRemovePointerPHIs : public FunctionPass return true; } - public: - static char ID; - ClambcRemovePointerPHIs() - : FunctionPass(ID) {} + /*The idea here is that we get the insertion point for where our calculated value + * will be saved. pInst is the value we want to save, so it will have to be*/ + Instruction *getInsertionPoint(Instruction *pInst) + { + BasicBlock *pBB = pInst->getParent(); + bool canBreak = false; + Instruction *pRet = nullptr; + for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { + pRet = llvm::cast(i); + if (canBreak && (not llvm::isa(pRet))) { + break; + } + if (pRet == pInst) { + canBreak = true; + } + } + return pRet; + } - bool runOnFunction(Function &F) override + /* Load the value from our AllocaInst, and + * replace the PHINode everywhere it is used.*/ + virtual void replaceUses(PHINode *pn, AllocaInst *pai) { + std::vector users; + for (auto i = pn->user_begin(), e = pn->user_end(); i != e; i++) { + if (Instruction *pUser = llvm::dyn_cast(*i)) { + users.push_back(pUser); + } + } + + for (size_t i = 0; i < users.size(); i++) { + Instruction *pUser = users[i]; + Instruction *insPt = nullptr; + + if (PHINode *pnUser = llvm::dyn_cast(pUser)) { + for (size_t j = 0; j < pnUser->getNumIncomingValues(); j++) { + if (pn == pnUser->getIncomingValue(j)) { + insPt = pnUser->getIncomingBlock(j)->getTerminator(); + break; + } + } + } else { + insPt = pUser; + } + + LoadInst *pli = new LoadInst(pn->getType(), pai, "ClamBCRemovePointerPHIs_load_", insPt); + for (size_t j = 0; j < pUser->getNumOperands(); j++) { + if (pn == pUser->getOperand(j)) { + pUser->setOperand(j, pli); + break; + } + } + } + } + + public: + ClamBCRemovePointerPHIs() {} - pFunc = &F; + /*This is only necessary for 0.103 on windows.*/ + virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &mam) + { + pMod = &m; bool ret = false; - std::vector phis = gatherPHIs(); - for (size_t i = 0; i < phis.size(); i++) { - PHINode *pn = phis[i]; + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + llvm::Function *pFunc = llvm::dyn_cast(i); + if (nullptr == pFunc) { + continue; + } + std::vector phis = gatherPHIs(pFunc); + for (size_t i = 0; i < phis.size(); i++) { + PHINode *pn = phis[i]; - if (handlePHI(pn)) { - ret = true; + if (handlePHI(pn)) { + ret = true; + } } } - return ret; + if (ret) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of class ClambcRemovePointerPHIs } // end of anonymous namespace -char ClambcRemovePointerPHIs::ID = 0; -static RegisterPass X("clambc-remove-pointer-phis", "Remove PHI Nodes with pointers", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemovePointerPHIs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-pointer-phis") { + FPM.addPass(ClamBCRemovePointerPHIs()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp b/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp deleted file mode 100644 index 012c91773b7..00000000000 --- a/libclambcc/ClamBCRemoveSelectInsts/ClamBCRemoveSelectInsts.cpp +++ /dev/null @@ -1,116 +0,0 @@ - -#include -#include "llvm/IR/Function.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -#include "Common/clambc.h" - -using namespace llvm; - -namespace -{ -class RemoveSelectInsts : public ModulePass -{ - protected: - bool bChanged = false; - Module* pMod = nullptr; - - void processBasicBlock(BasicBlock* pBB, std::vector& selects) - { - for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { - SelectInst* pSelect = llvm::dyn_cast(i); - if (pSelect) { - selects.push_back(pSelect); - } - } - } - - void processFunction(Function* pFunc, std::vector& selects) - { - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - BasicBlock* pBB = llvm::cast(i); - processBasicBlock(pBB, selects); - } - } - - std::vector gatherSelects() - { - std::vector selects; - for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { - Function* pFunc = llvm::cast(i); - - processFunction(pFunc, selects); - } - - return selects; - } - - Instruction* getAllocaInsertPoint(SelectInst* pSelect) - { - BasicBlock* entryBlock = llvm::cast(pSelect->getParent()->getParent()->begin()); - for (auto i = entryBlock->begin(), e = entryBlock->end(); i != e; i++) { - Instruction* pInst = llvm::cast(i); - if (not llvm::isa(pInst)) { - return pInst; - } - } - - assert(0 && "MALFORMED BASIC BLOCK"); - return nullptr; - } - - void replaceSelectInst(SelectInst* pSelect) - { - - Instruction* insertBefore = getAllocaInsertPoint(pSelect); - AllocaInst* pAlloca = new AllocaInst(pSelect->getType(), - pMod->getDataLayout().getProgramAddressSpace(), - "ClamBCRemoveSelectInst", insertBefore); - - BasicBlock* pBB = llvm::cast(pSelect->getParent()); - - BasicBlock* pSplit = pBB->splitBasicBlock(pSelect, "ClamBCRemoveSelectInst"); - new StoreInst(pSelect->getFalseValue(), pAlloca, pBB->getTerminator()); - - new StoreInst(pSelect->getTrueValue(), pAlloca, pSelect); - - BasicBlock* pSplit2 = pSplit->splitBasicBlock(pSelect, "ClamBCRemoveSelectInst"); - BranchInst::Create(pSplit, pSplit2, pSelect->getCondition(), pBB->getTerminator()); - - LoadInst* pLoad = new LoadInst(pAlloca->getType()->getPointerElementType(), pAlloca, "ClamBCRemoveSelectInst", pSelect); - pSelect->replaceAllUsesWith(pLoad); - - pBB->getTerminator()->eraseFromParent(); - pSelect->eraseFromParent(); - } - - public: - static char ID; - RemoveSelectInsts() - : ModulePass(ID) {} - - virtual bool runOnModule(Module& m) override - { - pMod = &m; - - std::vector selects = gatherSelects(); - for (size_t i = 0; i < selects.size(); i++) { - SelectInst* pSelect = selects[i]; - - replaceSelectInst(pSelect); - } - - return bChanged; - } -}; // end of struct RemoveSelectInsts -} // end of anonymous namespace - -char RemoveSelectInsts::ID = 0; -static RegisterPass X("remove-selects", "RemoveSelectInsts Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCRemoveUSUB.cpp b/libclambcc/ClamBCRemoveUSUB.cpp new file mode 100644 index 00000000000..5620863929f --- /dev/null +++ b/libclambcc/ClamBCRemoveUSUB.cpp @@ -0,0 +1,139 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Remove usub intrinsic because it's not supported by our runtime. + */ +struct ClamBCRemoveUSUB : public PassInfoMixin { + protected: + Module *pMod = nullptr; + const char *const USUB_NAME = ".usub"; + + FunctionType *usubType = nullptr; + + virtual llvm::FunctionType *getUSUBFunctionType(Type *functionArgType) + { + return FunctionType::get(functionArgType, {functionArgType, functionArgType}, false); + } + + virtual llvm::Function *addUSUB(Type *functionArgType) + { + uint32_t addressSpace = pMod->getDataLayout().getProgramAddressSpace(); + + FunctionType *ft = getUSUBFunctionType(functionArgType); + + llvm::Function *usub = Function::Create(ft, GlobalValue::InternalLinkage, USUB_NAME, *pMod); + Value *pLeft = usub->getArg(0); + Value *pRight = usub->getArg(1); + BasicBlock *pEntry = BasicBlock::Create(pMod->getContext(), "entry", usub); + BasicBlock *pLHS = BasicBlock::Create(pMod->getContext(), "left", usub); + BasicBlock *pRHS = BasicBlock::Create(pMod->getContext(), "right", usub); + BasicBlock *pRetBlock = BasicBlock::Create(pMod->getContext(), "ret", usub); + + // entry block + AllocaInst *retVar = new AllocaInst(functionArgType, addressSpace, "ret", pEntry); + ICmpInst *cmp = new ICmpInst(*pEntry, CmpInst::ICMP_UGT, pLeft, pRight, "icmp"); + BranchInst::Create(pLHS, pRHS, cmp, pEntry); + + // left > right + new StoreInst(BinaryOperator::Create(Instruction::Sub, pLeft, pRight, "ClamBCRemoveUSUB_", pLHS), retVar, pLHS); + BranchInst::Create(pRetBlock, pLHS); + + // right >= left + new StoreInst(ConstantInt::get(functionArgType, 0), retVar, pRHS); + BranchInst::Create(pRetBlock, pRHS); + + LoadInst *pli = new LoadInst(functionArgType, retVar, "load", pRetBlock); + ReturnInst::Create(pMod->getContext(), pli, pRetBlock); + return usub; + } + + virtual bool replaceCalls(const char *const intrinsicName, Type *functionArgType) + { + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()) { + Function *usub = addUSUB(functionArgType); + replaceAllCalls(getUSUBFunctionType(functionArgType), usub, calls, "ClamBCRemoveUSUB_"); + + return true; + } + return false; + } + + public: + virtual ~ClamBCRemoveUSUB() {} + + /*TODO: Add detection of these instructions to the validator.*/ + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.usub.sat.i32", Type::getInt32Ty(pMod->getContext())); + // bRet |= replaceCalls("llvm.usub.i16", Type::getInt16Ty(pMod->getContext())); + + if (bRet) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + +}; // end of struct ClamBCRemoveUSUB + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUSUB", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-usub") { + FPM.addPass(ClamBCRemoveUSUB()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp b/libclambcc/ClamBCRemoveUndefs.cpp similarity index 79% rename from libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp rename to libclambcc/ClamBCRemoveUndefs.cpp index 2151b1142fa..7ec7c9a4d6c 100644 --- a/libclambcc/ClamBCRemoveUndefs/ClamBCRemoveUndefs.cpp +++ b/libclambcc/ClamBCRemoveUndefs.cpp @@ -1,26 +1,30 @@ +#include "clambc.h" +#include "ClamBCUtilities.h" + #include -#include "llvm/IR/Module.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/Instructions.h" -#include "llvm/Support/raw_ostream.h" +#include +#include +#include +#include +#include +#include #include -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" using namespace llvm; +/* THIS APPEARS TO NO LONGER BE NEEDED. LEAVING IN PLACE DURING THE RC PHASE, JUST IN CASE. */ + namespace { /* - * This pass requires -mem2reg before it (TEMPORARILY) - * and must be run before -O3. + * This pass requires -mem2reg before it (TEMPORARILY) + * and must be run before -O3. * - * This will remove storing parameters in stack variables and loading from there. + * This will remove storing parameters in stack variables and loading from there. * * ; Function Attrs: noinline nounwind uwtable define dso_local i32 @decrypt_config(i32 %config_location, %struct._state* %state, i32 %sizeof_state) #0 { @@ -32,8 +36,7 @@ namespace store %struct._state* %state, %struct._state** %state.addr, align 8 store i32 %sizeof_state, i32* %sizeof_state.addr, align 4 */ -class ClamBCRemoveUndefs : public ModulePass -{ +struct ClamBCRemoveUndefs : public PassInfoMixin { protected: llvm::Module *pMod = nullptr; std::map aborts; @@ -56,9 +59,8 @@ class ClamBCRemoveUndefs : public ModulePass FunctionType *rterrTy = FunctionType::get( Type::getInt32Ty(BB->getContext()), {Type::getInt32Ty(BB->getContext())}, false); - Constant *func_abort = - BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); - Constant *func_rterr = + FunctionCallee func_abort = BB->getParent()->getParent()->getOrInsertFunction("abort", abrtTy); + FunctionCallee func_rterr = BB->getParent()->getParent()->getOrInsertFunction("bytecode_rt_error", rterrTy); BasicBlock *abort = BasicBlock::Create(BB->getContext(), "rterr.trig", BB->getParent()); Constant *PN = ConstantInt::get(Type::getInt32Ty(BB->getContext()), 99); @@ -217,14 +219,15 @@ class ClamBCRemoveUndefs : public ModulePass } public: - static char ID; - ClamBCRemoveUndefs() - : ModulePass(ID) {} + ClamBCRemoveUndefs() {} virtual ~ClamBCRemoveUndefs() {} - bool runOnModule(Module &m) override + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) { + /*This no longer appears to be needed. Will keep it during the -rc phase and then remove.*/ + return PreservedAnalyses::all(); + pMod = &m; for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { @@ -240,13 +243,30 @@ class ClamBCRemoveUndefs : public ModulePass delLst[i]->eraseFromParent(); } - return bChanged; + if (bChanged) { + return PreservedAnalyses::none(); + } + return PreservedAnalyses::all(); } }; // end of struct ClamBCRemoveUndefs } // end of anonymous namespace -char ClamBCRemoveUndefs::ID = 0; -static RegisterPass X("clambc-remove-undefs", "Remove Undefs", - false /* Only looks at CFG */, - false /* Analysis Pass */); +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUndefs", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-undefs") { + FPM.addPass(ClamBCRemoveUndefs()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics.cpp b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics.cpp new file mode 100644 index 00000000000..b7b5576fed8 --- /dev/null +++ b/libclambcc/ClamBCRemoveUnsupportedICMPIntrinsics.cpp @@ -0,0 +1,151 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2020-2023 Sourcefire, Inc. + * + * Authors: Andy Ragusa + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include + +#include +#include + +#include + +using namespace llvm; +using namespace std; + +namespace +{ +/* + * Remove smin intrinsic because it's not supported by our runtime. + */ +struct ClamBCRemoveUnsupportedICMPIntrinsics : public PassInfoMixin { + protected: + Module *pMod = nullptr; + // const char * const UnsupportedICMPIntrinsics_NAME = ".smin"; + + FunctionType *sminType = nullptr; + + virtual llvm::FunctionType *getUnsupportedICMPIntrinsicsFunctionType(Type *functionArgType) + { + return FunctionType::get(functionArgType, {functionArgType, functionArgType}, false); + } + + virtual llvm::Function *addFunction(Type *functionArgType, + const char *const newName, + llvm::CmpInst::Predicate predicate) + { + + uint32_t addressSpace = pMod->getDataLayout().getProgramAddressSpace(); + + FunctionType *ft = getUnsupportedICMPIntrinsicsFunctionType(functionArgType); + + llvm::Function *smin = Function::Create(ft, GlobalValue::InternalLinkage, newName, *pMod); + Value *pLeft = smin->getArg(0); + Value *pRight = smin->getArg(1); + BasicBlock *pEntry = BasicBlock::Create(pMod->getContext(), "entry", smin); + BasicBlock *pLHS = BasicBlock::Create(pMod->getContext(), "left", smin); + BasicBlock *pRHS = BasicBlock::Create(pMod->getContext(), "right", smin); + BasicBlock *pRetBlock = BasicBlock::Create(pMod->getContext(), "ret", smin); + + // entry block + AllocaInst *retVar = new AllocaInst(functionArgType, addressSpace, "ret", pEntry); + ICmpInst *cmp = new ICmpInst(*pEntry, predicate, pLeft, pRight, "icmp"); + BranchInst::Create(pLHS, pRHS, cmp, pEntry); + + // left > right + new StoreInst(pLeft, retVar, pLHS); + BranchInst::Create(pRetBlock, pLHS); + + // right >= left + new StoreInst(pRight, retVar, pRHS); + BranchInst::Create(pRetBlock, pRHS); + + LoadInst *pli = new LoadInst(functionArgType, retVar, "load", pRetBlock); + ReturnInst::Create(pMod->getContext(), pli, pRetBlock); + return smin; + } + + virtual bool replaceCalls(const char *const intrinsicName, + const char *newName, + llvm::CmpInst::Predicate predicate, + Type *functionArgType) + { + std::vector calls; + gatherCallsToIntrinsic(pMod, intrinsicName, calls); + if (calls.size()) { + Function *smin = addFunction(functionArgType, newName, predicate); + replaceAllCalls(getUnsupportedICMPIntrinsicsFunctionType(functionArgType), smin, calls, "ClamBCRemoveUnsupportedICMPIntrinsics_"); + + return true; + } + return false; + } + + public: + virtual ~ClamBCRemoveUnsupportedICMPIntrinsics() {} + + /*TODO: Add detection of these instructions to the validator.*/ + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM) + { + pMod = &m; + + bool bRet = replaceCalls("llvm.smin.i32", ".smin.32", CmpInst::ICMP_SLT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smin.i16", ".smin.16", CmpInst::ICMP_SLT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umin.i16", ".umin.16", CmpInst::ICMP_ULT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umin.i32", ".umin.32", CmpInst::ICMP_ULT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umax.i32", ".umax.32", CmpInst::ICMP_UGT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.umax.i16", ".umax.16", CmpInst::ICMP_UGT, Type::getInt16Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smax.i32", ".smax.32", CmpInst::ICMP_SGT, Type::getInt32Ty(pMod->getContext())); + bRet |= replaceCalls("llvm.smax.i16", ".smax.16", CmpInst::ICMP_SGT, Type::getInt16Ty(pMod->getContext())); + + if (bRet) { + return PreservedAnalyses::none(); + } + + return PreservedAnalyses::all(); + } + +}; // end of struct ClamBCRemoveUnsupportedICMPIntrinsics + +} // end of anonymous namespace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCRemoveUnsupportedICMPIntrinsics", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-remove-unsupported-icmp-intrinsics") { + FPM.addPass(ClamBCRemoveUnsupportedICMPIntrinsics()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCTrace/ClamBCTrace.cpp b/libclambcc/ClamBCTrace.cpp similarity index 79% rename from libclambcc/ClamBCTrace/ClamBCTrace.cpp rename to libclambcc/ClamBCTrace.cpp index 2943147ad21..91e9410689c 100644 --- a/libclambcc/ClamBCTrace/ClamBCTrace.cpp +++ b/libclambcc/ClamBCTrace.cpp @@ -20,7 +20,6 @@ * MA 02110-1301, USA. */ #include "clambc.h" -#include "ClamBCModule.h" #include "ClamBCCommon.h" #include "ClamBCUtilities.h" @@ -34,6 +33,8 @@ #include #include #include +#include +#include #include #include #include @@ -55,22 +56,19 @@ static cl::opt InsertTracing("clambc-trace", cl::Hidden, cl::init(false), cl::desc("Enable tracing of bytecode execution")); -namespace +namespace ClamBCTrace { -class ClamBCTrace : public ModulePass + +class ClamBCTrace : public PassInfoMixin { public: - static char ID; - ClamBCTrace() - : ModulePass(ID) {} + ClamBCTrace() {} virtual llvm::StringRef getPassName() const { return "ClamAV Bytecode Execution Tracing"; } - virtual bool runOnModule(Module &M); + PreservedAnalyses run(Module &m, ModuleAnalysisManager &MAM); }; -char ClamBCTrace::ID; -} // namespace /* declare i32 @trace_directory(i8*, i32) @@ -87,10 +85,11 @@ declare i32 @trace_ptr(i8*, i32) */ -bool ClamBCTrace::runOnModule(Module &M) +PreservedAnalyses ClamBCTrace::run(Module &M, ModuleAnalysisManager &MAM) { - if (!InsertTracing) - return false; + if (!InsertTracing) { + return PreservedAnalyses::all(); + } unsigned MDDbgKind = M.getContext().getMDKindID("dbg"); DenseMap scopeIDs; unsigned scopeid = 0; @@ -102,16 +101,16 @@ bool ClamBCTrace::runOnModule(Module &M) args.push_back(I32Ty); FunctionType *FTy = FunctionType::get(I32Ty, args, false); /* llvm 10 replaces this with FunctionCallee. */ - Constant *trace_directory = M.getOrInsertFunction("trace_directory", FTy); - Constant *trace_scope = M.getOrInsertFunction("trace_scope", FTy); - Constant *trace_source = M.getOrInsertFunction("trace_source", FTy); - Constant *trace_op = M.getOrInsertFunction("trace_op", FTy); - Constant *trace_value = M.getOrInsertFunction("trace_value", FTy); - Constant *trace_ptr = M.getOrInsertFunction("trace_ptr", FTy); + FunctionCallee trace_directory = M.getOrInsertFunction("trace_directory", FTy); + FunctionCallee trace_scope = M.getOrInsertFunction("trace_scope", FTy); + FunctionCallee trace_source = M.getOrInsertFunction("trace_source", FTy); + FunctionCallee trace_op = M.getOrInsertFunction("trace_op", FTy); + FunctionCallee trace_value = M.getOrInsertFunction("trace_value", FTy); + FunctionCallee trace_ptr = M.getOrInsertFunction("trace_ptr", FTy); assert(trace_scope && trace_source && trace_op && trace_value && trace_directory && trace_ptr); - if (!trace_directory->use_empty() || !trace_scope->use_empty() || !trace_source->use_empty() || !trace_op->use_empty() || - !trace_value->use_empty() || !trace_ptr->use_empty()) { + if (!trace_directory.getCallee()->use_empty() || !trace_scope.getCallee()->use_empty() || !trace_source.getCallee()->use_empty() || !trace_op.getCallee()->use_empty() || + !trace_value.getCallee()->use_empty() || !trace_ptr.getCallee()->use_empty()) { ClamBCStop("Tracing API can only be used by compiler!\n", &M); } @@ -140,7 +139,7 @@ bool ClamBCTrace::runOnModule(Module &M) DILocation *Loc = II->getDebugLoc(); StringRef file = Loc->getFilename(); Value *File = builder.CreateGlobalStringPtr(file.str().c_str()); - /*just getting this to compile, so i can iterate the MDNode's in the Instruction, + /*just getting this to compile, so i can iterate the MDNode's in the Instruction, * and see which one i want. */ MDNode *NewScope = nullptr; @@ -155,20 +154,19 @@ bool ClamBCTrace::runOnModule(Module &M) DIScope *scope = Loc->getScope(); while (llvm::isa(scope)) { DILexicalBlock *lex = llvm::cast(scope); - //scope = lex->getContext(); - /*aragusa: I have no idea if this is the right thing to do here.*/ + // scope = lex->getContext(); scope = lex->getScope(); } Value *Scope = 0; if (llvm::isa(scope)) { DISubprogram *sub = llvm::cast(scope); - //StringRef name = sub->getDisplayName(); - //if (name.empty()) name = sub->getName(); + // StringRef name = sub->getDisplayName(); + // if (name.empty()) name = sub->getName(); StringRef name = sub->getName(); Scope = builder.CreateGlobalStringPtr(name.str().c_str()); } else { - //assert(scope->isCompileUnit()); + // assert(scope->isCompileUnit()); assert(llvm::isa(scope) && "Not a DICompileUnit"); DICompileUnit *unit = llvm::cast(scope); Scope = @@ -197,14 +195,9 @@ bool ClamBCTrace::runOnModule(Module &M) for (Function::arg_iterator AI = I->arg_begin(), AE = I->arg_end(); AI != AE; ++AI) { if (isa(AI->getType())) { -#if 0 - Value *V = builder.CreateIntCast(AI, Type::getInt32Ty(M.getContext()), false); - Value *ValueName = builder.CreateGlobalStringPtr(AI->getName().data()); - builder.CreateCall2(trace_value, ValueName, V); -#endif } else if (isa(AI->getType())) { Value *V = builder.CreatePointerCast(AI, - PointerType::getUnqual(Type::getInt8Ty(M.getContext()))); + PointerType::getUnqual(Type::getInt8Ty(M.getContext()))); std::vector args = { V, ConstantInt::get(Type::getInt32Ty(M.getContext()), 0)}; builder.CreateCall(trace_ptr, args, "ClamBCTrace_trace_ptr"); @@ -218,13 +211,7 @@ bool ClamBCTrace::runOnModule(Module &M) std::vector args = { Op, ConstantInt::get(Type::getInt32Ty(M.getContext()), Loc->getColumn())}; builder.CreateCall(trace_op, args, "ClamBCTrace_trace_op"); - //Value *ValueName = builder.CreateGlobalStringPtr(II->getName().data()); if (isa(II->getType())) { -#if 0 - builder.SetInsertPoint(&*J, BBIt); - Value *V = builder.CreateIntCast(II, Type::getInt32Ty(M.getContext()), false); - builder.CreateCall2(trace_value, ValueName, V); -#endif } else if (isa(II->getType())) { builder.SetInsertPoint(&*J, BBIt); Value *V = builder.CreatePointerCast(II, @@ -237,10 +224,26 @@ bool ClamBCTrace::runOnModule(Module &M) } } } - return true; + return PreservedAnalyses::none(); } -llvm::ModulePass *createClamBCTrace() +} // namespace ClamBCTrace + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCTrace(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCTrace", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-trace") { + FPM.addPass(ClamBCTrace::ClamBCTrace()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/ClamBCUtilities.cpp b/libclambcc/ClamBCUtilities.cpp new file mode 100644 index 00000000000..8f9f7b58ec6 --- /dev/null +++ b/libclambcc/ClamBCUtilities.cpp @@ -0,0 +1,329 @@ + +#include "ClamBCUtilities.h" +#include "ClamBCDiagnostics.h" +#include "clambc.h" + +#include +#include +#include + +using namespace llvm; + +void ClamBCStop(const Twine &Msg, const Module *M) +{ + printDiagnostic(Msg, M); + exit(42); +} + +void ClamBCStop(const Twine &Msg, const Function *F) +{ + printDiagnostic(Msg, F); + exit(42); +} + +void ClamBCStop(const Twine &Msg, const Instruction *I) +{ + printDiagnostic(Msg, I); + exit(42); +} + +bool functionRecurses(Function *pFunc, Function *orig, std::vector &visited) +{ + if (visited.end() != std::find(visited.begin(), visited.end(), pFunc)) { + return false; + } + visited.push_back(pFunc); + + for (auto funcIter = pFunc->begin(), funcEnd = pFunc->end(); funcIter != funcEnd; funcIter++) { + BasicBlock *bb = llvm::cast(funcIter); + + for (auto blockIter = bb->begin(), blockEnd = bb->end(); blockIter != blockEnd; blockIter++) { + Instruction *inst = llvm::cast(blockIter); + if (CallInst *ci = llvm::dyn_cast(inst)) { + Value *calledValue = ci->getCalledFunction(); + if (nullptr == calledValue) { + ClamBCStop("Calls to function pointers not allowed", ci); + } + if (calledValue == orig) { + return true; + } else if (Function *callee = dyn_cast(calledValue)) { + if (functionRecurses(callee, orig, visited)) { + return true; + } + } + } + } + } + return false; +} + +bool functionRecurses(Function *pFunc) +{ + std::vector visited; + return functionRecurses(pFunc, pFunc, visited); +} + +void getDependentValues(llvm::Value *pv, std::set &insts, + std::set &globs, std::set &ces, + std::set &visited) +{ + if (visited.end() != std::find(visited.begin(), visited.end(), pv)) { + return; + } + + bool first = (0 == visited.size()); + visited.insert(pv); + + if (not first) { + if (llvm::isa(pv)) { + Instruction *inst = llvm::cast(pv); + insts.insert(inst); + } else if (llvm::isa(pv)) { + GlobalVariable *gv = llvm::cast(pv); + globs.insert(gv); + } else if (llvm::isa(pv)) { + GEPOperator *tmp = llvm::cast(pv); + assert(llvm::isa(pv) && "Not a ConstantExpr"); + getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); + } else if (llvm::isa(pv)) { + BitCastOperator *tmp = llvm::cast(pv); + assert(llvm::isa(pv) && "Not a ConstantExpr"); + getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); + } else if (llvm::isa(pv)) { + PtrToIntOperator *tmp = llvm::cast(pv); + assert(llvm::isa(pv) && "Not a ConstantExpr"); + getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); + } else if (llvm::isa(pv)) { + ZExtOperator *tmp = llvm::cast(pv); + assert(llvm::isa(pv) && "Not a ConstantExpr"); + getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); + } + + if (llvm::isa(pv)) { + ConstantExpr *ce = llvm::cast(pv); + ces.insert(ce); + getDependentValues(ce->getOperand(0), insts, globs, ces, visited); + } + } + + for (auto i = pv->user_begin(), e = pv->user_end(); i != e; i++) { + Value *val = llvm::cast(*i); + getDependentValues(val, insts, globs, ces, visited); + } +} + +void getDependentValues(llvm::Value *pv, std::set &insts, + std::set &globs) +{ + std::set ces; + std::set visited; + getDependentValues(pv, insts, globs, ces, visited); +} + +void getDependentValues(llvm::Value *pv, std::set &insts, + std::set &globs, std::set &ces) +{ + std::set visited; + getDependentValues(pv, insts, globs, ces, visited); +} + +bool functionHasLoop(llvm::Function *pFunc, llvm::LoopInfo &loopInfo) +{ + for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { + BasicBlock *pBB = llvm::cast(i); + if (nullptr != loopInfo.getLoopFor(pBB)) { + return true; + } + } + return false; +} + +llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock) +{ + return llvm::cast(pBlock->getParent()->begin()); +} + +int64_t getTypeSize(llvm::Module *pMod, llvm::Type *pt) +{ + + int64_t size = pt->getScalarSizeInBits(); + if (size) { + return size; + } + + ArrayType *pat = llvm::dyn_cast(pt); + if (nullptr != pat) { + size = pat->getNumElements() * (getTypeSize(pMod, pat->getElementType())); + if (size) { + return size; + } + } + + StructType *pst = llvm::dyn_cast(pt); + if (nullptr != pst) { + const StructLayout *psl = pMod->getDataLayout().getStructLayout(pst); + return psl->getSizeInBits(); + } + + assert(0 && "Size has not been computed"); + return -1; +} + +int64_t getTypeSizeInBytes(llvm::Module *pMod, Type *pt) +{ + return getTypeSize(pMod, pt) / 8; +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pt, uint64_t idx) +{ + + int64_t cnt = 0; + + assert((llvm::isa(pt) || llvm::isa(pt)) && "pt must be a complex type"); + + StructType *pst = llvm::dyn_cast(pt); + if (nullptr != pst) { + assert((idx <= pst->getNumElements()) && "Idx too high"); + + const StructLayout *psl = pMod->getDataLayout().getStructLayout(pst); + assert(psl && "Could not get layout"); + + cnt = psl->getElementOffsetInBits(idx) / 8; + + } else { + ArrayType *pat = llvm::dyn_cast(pt); + if (nullptr != pat) { + assert((idx <= pat->getNumElements()) && "Idx too high"); + cnt = idx * getTypeSizeInBytes(pMod, pat->getElementType()); + } + } + + return cnt; +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pst, ConstantInt *pIdx) +{ + int64_t idx = pIdx->getLimitedValue(); + return computeOffsetInBytes(pMod, pst, idx); +} + +int64_t computeOffsetInBytes(llvm::Module *pMod, Type *pst) +{ + if (llvm::isa(pst)) { + return computeOffsetInBytes(pMod, pst, pst->getStructNumElements()); + } else if (llvm::isa(pst)) { + return computeOffsetInBytes(pMod, pst, pst->getArrayNumElements()); + } else { + assert(0 && "pt must be a complex type"); + } + + return 0; +} + +Type *findTypeAtIndex(Type *pst, ConstantInt *ciIdx) +{ + Type *ret = nullptr; + StructType *st = llvm::dyn_cast(pst); + if (nullptr != st) { + uint64_t idx = ciIdx->getLimitedValue(); + + assert(idx < st->getNumElements() && "Something went wrong"); + return st->getTypeAtIndex(idx); + } + + ArrayType *at = llvm::dyn_cast(pst); + if (nullptr != at) { + return at->getArrayElementType(); + } + return ret; +} + +/*Only pass in either ConstantExpr or Instruction */ +Type *getResultType(Value *pVal) +{ + + Type *type = nullptr; + + if (llvm::isa(pVal)) { + ConstantExpr *pce = llvm::cast(pVal); + type = pce->getOperand(0)->getType(); + } else if (llvm::isa(pVal)) { + Instruction *pInst = llvm::cast(pVal); + type = pInst->getOperand(0)->getType(); + } else { + assert(0 && "This function must be called with either Instruction or a ConstantExpr"); + return nullptr; + } + + if (llvm::isa(type)) { + if (llvm::isa(pVal)) { + GEPOperator *pgep = llvm::cast(pVal); + type = pgep->getSourceElementType(); + + } else if (llvm::isa(pVal)) { + GetElementPtrInst *pInst = llvm::cast(pVal); + type = pInst->getSourceElementType(); + } else if (llvm::isa(pVal)) { + BitCastOperator *pbco = llvm::cast(pVal); + type = pbco->getDestTy(); + } else if (llvm::isa(pVal)) { + BitCastInst *pInst = llvm::cast(pVal); + type = pInst->getDestTy(); + } else { + llvm::errs() << "<" << __LINE__ << ">" + << "https://llvm.org/docs/OpaquePointers.html" + << "\n"; + llvm::errs() << "<" << __LINE__ << ">" << *pVal << "\n"; + assert(0 && "FIGURE OUT WHAT TO DO HERE"); + } + } + + return type; +} + +void gatherCallsToIntrinsic(Function *pFunc, const char *const functionName, std::vector &calls) +{ + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++) { + BasicBlock *pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { + if (CallInst *pci = llvm::dyn_cast(bi)) { + Function *pCalled = pci->getCalledFunction(); + if (pCalled && pCalled->isIntrinsic()) { + if (functionName == pCalled->getName()) { + calls.push_back(pci); + } + } + } + } + } +} + +void gatherCallsToIntrinsic(Module *pMod, const char *const functionName, std::vector &calls) +{ + for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { + Function *pFunc = llvm::cast(i); + if (pFunc->isDeclaration()) { + continue; + } + + gatherCallsToIntrinsic(pFunc, functionName, calls); + } +} + +void replaceAllCalls(FunctionType *pFuncType, Function *pFunc, + const std::vector &calls, const char *const namePrefix) +{ + + for (size_t i = 0; i < calls.size(); i++) { + CallInst *pci = calls[i]; + + std::vector args; + for (size_t i = 0; i < pci->arg_size(); i++) { + args.push_back(pci->getArgOperand(i)); + } + CallInst *pNew = CallInst::Create(pFuncType, pFunc, args, + namePrefix, pci); + pci->replaceAllUsesWith(pNew); + pci->eraseFromParent(); + } +} diff --git a/libclambcc/ClamBCUtilities.h b/libclambcc/ClamBCUtilities.h new file mode 100644 index 00000000000..ec3d882a47d --- /dev/null +++ b/libclambcc/ClamBCUtilities.h @@ -0,0 +1,58 @@ +#ifndef CLAMBC_UTILITIES_H_ +#define CLAMBC_UTILITIES_H_ + +#include "ClamBCDiagnostics.h" + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include + +/*These are a temporary replacement for ClamBCModule::stop. */ +void ClamBCStop(const llvm::Twine &Msg, const llvm::Module *M); + +void ClamBCStop(const llvm::Twine &Msg, const llvm::Function *F); + +void ClamBCStop(const llvm::Twine &Msg, const llvm::Instruction *I); + +bool functionRecurses(llvm::Function *pFunc); + +void getDependentValues(llvm::Value *pv, std::set &insts, std::set &globs); + +void getDependentValues(llvm::Value *pv, std::set &insts, std::set &globs, std::set &ces); + +bool functionHasLoop(llvm::Function *pFunc, llvm::LoopInfo &loopInfo); + +llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock); + +int64_t getTypeSize(llvm::Module *pMod, llvm::Type *pt); + +int64_t getTypeSizeInBytes(llvm::Module *pMod, llvm::Type *pt); + +int64_t computeOffsetInBytes(llvm::Module *pMod, llvm::Type *pt, uint64_t idx); + +int64_t computeOffsetInBytes(llvm::Module *pMod, llvm::Type *pst, llvm::ConstantInt *pIdx); + +int64_t computeOffsetInBytes(llvm::Module *pMod, llvm::Type *pst); + +llvm::Type *findTypeAtIndex(llvm::Type *pst, llvm::ConstantInt *ciIdx); + +llvm::Type *getResultType(llvm::Value *pVal); + +void gatherCallsToIntrinsic(llvm::Function *pFunc, const char *const functionName, + std::vector &calls); + +void gatherCallsToIntrinsic(llvm::Module *pMod, const char *const functionName, + std::vector &calls); + +void replaceAllCalls(llvm::FunctionType *pFuncType, llvm::Function *pFunc, + const std::vector &calls, const char *const namePrefix); + +#endif // CLAMBC_UTILITIES_H_ diff --git a/libclambcc/ClamBCVerifier.cpp b/libclambcc/ClamBCVerifier.cpp new file mode 100644 index 00000000000..399466448be --- /dev/null +++ b/libclambcc/ClamBCVerifier.cpp @@ -0,0 +1,391 @@ +/* + * Compile LLVM bytecode to ClamAV bytecode. + * + * Copyright (C) 2009-2010 Sourcefire, Inc. + * + * Authors: Török Edvin + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301, USA. + */ + +/* + * aragusa: + * I haven't looked into everything this pass does, but one thing I have found is that it inserts run-time bounds + * checking for pointers. What it does is look at the access to a pointer, and insert a check for if that will + * access too much memory. If it would, it jumps to an "AbortBB", a basic block that calls abort. One potential + * improvement, would be to look at all the calls ahead of time and only have a check for the highest access, not + * every access. Instruction combining doesn't do a great job of fixing those up. + * + * There are cases where the IR would look like the following pseudocode. + * + * if (idx < 67){ + * if (idx < 70) { + * do stuff ... + * } else { + * call abort + * } + * } else { + * call abort + * } + */ + +#include "ClamBCDiagnostics.h" +#include "clambc.h" +#include "ClamBCUtilities.h" + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +#include + +using namespace llvm; + +namespace ClamBCVerifier +{ +class ClamBCVerifier : public PassInfoMixin, + public InstVisitor +{ + + bool Final; + llvm::Module *pMod = nullptr; + + friend class InstVisitor; + + bool visitUnreachableInst(UnreachableInst &I) + { + return true; + } + + bool visitAllocaInst(AllocaInst &I) + { + return true; + } + bool visitCastInst(CastInst &I) + { + return true; + } + bool visitSelectInst(SelectInst &I) + { + llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" + << "Selects need tobe removed, so this should be a false\n"; + return true; + } + bool visitBranchInst(BranchInst &BI) + { + return true; + } + bool visitSwitchInst(SwitchInst &I) + { + printDiagnostic("Need to lower switchInst's to branches", &I); + return false; + } + bool visitBinaryOperator(Instruction &I) + { + return true; + } + bool visitReturnInst(ReturnInst &I) + { + return true; + } + bool visitICmpInst(ICmpInst &I) + { + return true; + } + + /* + * FreezeInst's are used to guarantee a value being set to something fixed + * if it is undef or a poison value. They are a noop otherwise, so we will allow + * them in the verifier, and remove them in a pass to be run after the verifier. + * (a 'verifier' shouldn't be changing the IR). + */ + bool visitFreezeInst(FreezeInst &I) + { + return true; + } + + bool visitInstruction(Instruction &I) + { + + DEBUG_VALUE(&I); +#define DEBUG_NODEREF(val) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << val << "\n"; + DEBUG_NODEREF(llvm::isa(&I)); + + printDiagnostic("Unhandled instruction in verifier", &I); + return false; + } + + Function *getCalledFunctionFromCallInst(CallInst *pci) + { + + Value *pCalledOperand = pci->getCalledOperand(); + Function *ret = llvm::dyn_cast(pCalledOperand); + if (nullptr == ret) { + if (BitCastOperator *bco = llvm::dyn_cast(pCalledOperand)) { + ret = llvm::dyn_cast(bco->getOperand(0)); + } + } + + if (nullptr == ret) { + ClamBCStop("Verifier unable to get called function from call instruction", pci); + } + + return ret; + } + + bool validateFunction(const llvm::Function *pFunc) + { + + if (pFunc->isVarArg()) { + if (!pFunc->getFunctionType()->getNumParams()) { + printDiagnostic(("Calling implicitly declared function '" + + pFunc->getName() + "' is not supported (did you forget to" + "implement it, or typoed the function name?)") + .str(), + pFunc); + } else { + printDiagnostic("Checking calls to vararg functions/functions without" + "a prototype is not supported!", + pFunc); + } + return false; + } + + return true; + } + + bool visitCallInst(CallInst &CI) + { + Function *F = getCalledFunctionFromCallInst(&CI); + if (!F) { + /*Determine if we want to allow indirect calls*/ + printDiagnostic("Indirect call checking not implemented!", &CI); + return false; + } + + if (F->getCallingConv() != CI.getCallingConv()) { + printDiagnostic("For call to " + F->getName() + ", calling conventions don't match!", &CI); + return false; + } + + return validateFunction(F); + } + + bool visitPHINode(PHINode &PN) + { + for (unsigned i = 0; i < PN.getNumIncomingValues(); i++) { + if (isa(PN.getIncomingValue(i))) { + const Module *M = PN.getParent()->getParent()->getParent(); + printDiagnosticValue("Undefined value in phi", M, &PN); + break; + } + } + return true; + } + + bool visitGetElementPtrInst(GetElementPtrInst &GEP) + { + return true; + } + + bool visitLoadInst(LoadInst &LI) + { + return true; + } + + bool visitStoreInst(StoreInst &SI) + { + return true; + } + + virtual bool isHandled(Instruction *pInst) + { + bool bRet = llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst) || llvm::isa(pInst); + + return bRet; + } + + virtual bool isUndefOrPoisonValue(Value *pv) + { + return llvm::isa(pv); + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce, std::set &visited) + { + if (visited.end() != std::find(visited.begin(), visited.end(), pce)) { + return false; + } + visited.insert(pce); + + for (size_t i = 0; i < pce->getNumOperands(); i++) { + Value *pv = pce->getOperand(i); + if (isUndefOrPoisonValue(pv)) { + return true; + } + if (ConstantExpr *ce = llvm::dyn_cast(pv)) { + if (hasUndefsOrPoisonValues(ce, visited)) { + return true; + } + } + } + + return false; + } + + virtual bool hasUndefsOrPoisonValues(ConstantExpr *pce) + { + std::set visited; + return hasUndefsOrPoisonValues(pce, visited); + } + + /*PoisonValue is derived from UndefValue, so we only have to check for that one.*/ + virtual bool hasUndefsOrPoisonValues(Instruction *pInst) + { + for (size_t i = 0; i < pInst->getNumOperands(); i++) { + Value *pVal = pInst->getOperand(i); + if (llvm::isa(pVal)) { + continue; + } + + if (isUndefOrPoisonValue(pVal)) { + return true; + } + + if (ConstantExpr *pce = llvm::dyn_cast(pVal)) { + if (hasUndefsOrPoisonValues(pce)) { + return true; + } + } + } + return false; + } + + virtual bool walk(Function *pFunc) + { + bool bRet = true; + for (auto fi = pFunc->begin(), fe = pFunc->end(); fi != fe; fi++) { + BasicBlock *pBB = llvm::cast(fi); + for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { + Instruction *pInst = llvm::cast(bi); + if (hasUndefsOrPoisonValues(pInst)) { + printDiagnostic("Poison value or Undef value found in instruction.", pInst); + return false; + } + + if (PHINode *pn = llvm::dyn_cast(pInst)) { + bRet = visitPHINode(*pn); + } else if (CallInst *pci = llvm::dyn_cast(pInst)) { + bRet = visitCallInst(*pci); + } else if (SwitchInst *psi = llvm::dyn_cast(pInst)) { + bRet = visitSwitchInst(*psi); + } else { + bRet = isHandled(pInst); + } + + if (!bRet) { + break; + } + } + } + + return bRet; + } + + public: + explicit ClamBCVerifier() + : Final(false) {} + + virtual llvm::StringRef getPassName() const + { + return "ClamAV Bytecode Verifier"; + } + + PreservedAnalyses run(Function &F, FunctionAnalysisManager &fam) + { + pMod = F.getParent(); + bool OK = validateFunction(&F); + if (OK) { + OK = walk(&F); + } + + if (!OK) { + ClamBCStop("Verifier rejected bytecode function due to errors", + &F); + } + + return PreservedAnalyses::all(); + } + virtual void getAnalysisUsage(AnalysisUsage &AU) const + { + AU.addRequired(); + AU.addRequired(); + AU.setPreservesAll(); + } +}; +// char ClamBCVerifier::ID = 0; + +} // namespace ClamBCVerifier + +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() +{ + return { + LLVM_PLUGIN_API_VERSION, "ClamBCVerifier", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, FunctionPassManager &FPM, + ArrayRef) { + if (Name == "clambc-verifier") { + FPM.addPass(ClamBCVerifier::ClamBCVerifier()); + return true; + } + return false; + }); + }}; +} diff --git a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp b/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp deleted file mode 100644 index c6f1f290fc9..00000000000 --- a/libclambcc/ClamBCVerifier/ClamBCVerifier.cpp +++ /dev/null @@ -1,261 +0,0 @@ -/* - * Compile LLVM bytecode to ClamAV bytecode. - * - * Copyright (C) 2009-2010 Sourcefire, Inc. - * - * Authors: Török Edvin - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, - * MA 02110-1301, USA. - */ - -/* - * aragusa: - * I haven't looked into everything this pass does, but one thing I have found is that it inserts run-time bounds - * checking for pointers. What it does is look at the access to a pointer, and insert a check for if that will - * access too much memory. If it would, it jumps to an "AbortBB", a basic block that calls abort. One potential - * improvement, would be to look at all the calls ahead of time and only have a check for the highest access, not - * every access. Instruction combining doesn't do a great job of fixing those up. - * - * There are cases where the IR would look like the following pseudocode. - * - * if (idx < 67){ - * if (idx < 70) { - * do stuff ... - * } else { - * call abort - * } - * } else { - * call abort - * } - */ - -#include -#include "llvm/IR/Function.h" -#include "llvm/Support/raw_ostream.h" - -#include "llvm/IR/LegacyPassManager.h" -#include "llvm/Transforms/IPO/PassManagerBuilder.h" - -using namespace llvm; - -#include "ClamBCDiagnostics.h" -#include "ClamBCModule.h" -#include -#include -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/ScalarEvolution.h" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/ScalarEvolutionExpander.h" -#include -#include -#include -#include -#include -#include -#include "llvm/Support/CommandLine.h" -#include -#include -#include -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include - -#include "llvm/ADT/SmallSet.h" - -#include "Common/clambc.h" -#include "Common/ClamBCUtilities.h" - -static cl::opt - StopOnFirstError("clambc-stopfirst", cl::init(false), - cl::desc("Stop on first error in the verifier")); -namespace -{ -class ClamBCVerifier : public FunctionPass, - public InstVisitor -{ - - ScalarEvolution *SE; - DominatorTree *DT; - BasicBlock *AbrtBB; - bool Final; - llvm::Module *pMod = nullptr; - - friend class InstVisitor; - - bool visitUnreachableInst(UnreachableInst &I) - { - return true; - } - - bool visitAllocaInst(AllocaInst &I) - { - return true; - } - bool visitCastInst(CastInst &I) - { - return true; - } - bool visitSelectInst(SelectInst &I) - { - return true; - } - bool visitBranchInst(BranchInst &BI) - { - return true; - } - bool visitSwitchInst(SwitchInst &I) - { - printDiagnostic("Need to lower switchInst's to branches", &I); - return false; - } - bool visitBinaryOperator(Instruction &I) - { - return true; - } - bool visitReturnInst(ReturnInst &I) - { - return true; - } - bool visitICmpInst(ICmpInst &I) - { - return true; - } - - bool visitInstruction(Instruction &I) - { - printDiagnostic("Unhandled instruction in verifier", &I); - return false; - } - - Function *getCalledFunctionFromCallInst(CallInst *pci) - { - Function *ret = pci->getCalledFunction(); - if (nullptr == ret) { - Value *v = pci->getCalledValue(); - if (BitCastOperator *bco = llvm::dyn_cast(v)) { - ret = llvm::dyn_cast(bco->getOperand(0)); - } - } - - return ret; - } - bool visitCallInst(CallInst &CI) - { - Function *F = getCalledFunctionFromCallInst(&CI); - if (!F) { - printDiagnostic("Indirect call checking not implemented yet!", &CI); - return false; - } - - if (F->getCallingConv() != CI.getCallingConv()) { - printDiagnostic("For call to " + F->getName() + ", calling conventions don't match!", &CI); - return false; - } - if (F->isVarArg()) { - if (!F->getFunctionType()->getNumParams()) { - printDiagnostic(("Calling implicitly declared function '" + - F->getName() + "' is not supported (did you forget to" - "implement it, or typoed the function name?)") - .str(), - &CI); - } else { - printDiagnostic("Checking calls to vararg functions/functions without" - "a prototype is not supported!", - &CI); - } - return false; - } - - return true; - } - - bool visitPHINode(PHINode &PN) - { - for (unsigned i = 0; i < PN.getNumIncomingValues(); i++) { - if (isa(PN.getIncomingValue(i))) { - const Module *M = PN.getParent()->getParent()->getParent(); - printDiagnosticValue("Undefined value in phi", M, &PN); - break; - } - } - return true; - } - - bool visitGetElementPtrInst(GetElementPtrInst &GEP) - { - return true; - } - - bool visitLoadInst(LoadInst &LI) - { - return true; - } - - bool visitStoreInst(StoreInst &SI) - { - return true; - } - - public: - static char ID; - explicit ClamBCVerifier() - : FunctionPass(ID), Final(false) {} - - virtual llvm::StringRef getPassName() const - { - return "ClamAV Bytecode Verifier"; - } - - virtual bool runOnFunction(Function &F) - { - pMod = F.getParent(); - AbrtBB = 0; - SE = &getAnalysis().getSE(); - ; - DT = &getAnalysis().getDomTree(); - - bool OK = true; - std::vector insns; - // verifying can insert runtime checks, so be safe and create an initial - // list of instructions to process so we are not affected by transforms. - for (inst_iterator I = inst_begin(&F), E = inst_end(&F); I != E; ++I) { - insns.push_back(&*I); - } - for (std::vector::iterator I = insns.begin(), E = insns.end(); - I != E; ++I) { - OK &= visit(*I); - if (!OK && StopOnFirstError) - break; - } - if (!OK) - ClamBCStop("Verifier rejected bytecode function due to errors", - &F); - return false; - } - virtual void getAnalysisUsage(AnalysisUsage &AU) const - { - AU.addRequired(); - AU.addRequired(); - AU.setPreservesAll(); - } -}; -char ClamBCVerifier::ID = 0; - -} // namespace - -static RegisterPass X("clambc-verifier", "ClamBCVerifier Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); diff --git a/libclambcc/ClamBCWriter/ClamBCWriter.cpp b/libclambcc/ClamBCWriter.cpp similarity index 92% rename from libclambcc/ClamBCWriter/ClamBCWriter.cpp rename to libclambcc/ClamBCWriter.cpp index e1f60a4fba8..0a1f4d20d94 100644 --- a/libclambcc/ClamBCWriter/ClamBCWriter.cpp +++ b/libclambcc/ClamBCWriter.cpp @@ -19,11 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, * MA 02110-1301, USA. */ -#include "../Common/bytecode_api.h" +#include "bytecode_api.h" #include "clambc.h" #include "ClamBCModule.h" -#include "ClamBCAnalyzer/ClamBCAnalyzer.h" -#include "Common/ClamBCUtilities.h" +#include "ClamBCUtilities.h" + +#include "ClamBCAnalyzer.h" +#include "ClamBCRegAlloc.h" #include #include @@ -45,6 +47,8 @@ #include #include #include +#include +#include #include #include #include @@ -60,8 +64,8 @@ extern "C" const char *clambc_getversion(void); -//There were some things that were in the previous Module, that may or may not be needed at this time. There -//are ways to share data between passes, will do that if it is necessary. +// There were some things that were in the previous Module, that may or may not be needed at this time. There +// are ways to share data between passes, will do that if it is necessary. using namespace llvm; @@ -72,10 +76,6 @@ static cl::opt DumpDI("clambc-dumpdi", cl::Hidden, cl::init(false), cl::desc("Dump LLVM IR with debug info to standard output")); -cl::opt - WriteDI("clambc-dbg", cl::Hidden, cl::init(false), - cl::desc("Write debug information into output bytecode")); - static cl::opt outFile("clambc-sigfile", cl::desc("Name of output file"), cl::value_desc("Name of output file"), cl::init("")); @@ -105,7 +105,7 @@ class ClamBCOutputWriter public: static ClamBCOutputWriter *createClamBCOutputWriter(llvm::StringRef srFileName, llvm::Module *pMod, - ClamBCAnalyzer *pAnalyzer) + ClamBCAnalysis *pAnalyzer) { std::error_code ec; raw_fd_ostream *rfo = new raw_fd_ostream(srFileName, ec); @@ -113,17 +113,17 @@ class ClamBCOutputWriter if (nullptr == fro) { assert(0 && "FIGURE OUT THE CORRECT WAY TO DIE"); - //ClamBCStop(); + // ClamBCStop(); } ClamBCOutputWriter *ret = new ClamBCOutputWriter(*fro, pMod, pAnalyzer); if (nullptr == ret) { assert(0 && "FIGURE OUT THE CORRECT WAY TO DIE"); - //ClamBCStop(); + // ClamBCStop(); } return ret; } - ClamBCOutputWriter(llvm::formatted_raw_ostream &outStream, llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + ClamBCOutputWriter(llvm::formatted_raw_ostream &outStream, llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) : Out(lineBuffer), OutReal(outStream), maxLineLength(0), lastLinePos(0), pMod(pMod), pAnalyzer(pAnalyzer) { printGlobals(pMod, pAnalyzer); @@ -162,7 +162,7 @@ class ClamBCOutputWriter printFixedNumber(Out, n, fixed); } - void printModuleHeader(Module &M, ClamBCAnalyzer *pAnalyzer, unsigned maxLine) + void printModuleHeader(Module &M, ClamBCAnalysis *pAnalyzer, unsigned maxLine) { NamedMDNode *MinFunc = M.getNamedMetadata("clambc.funcmin"); NamedMDNode *MaxFunc = M.getNamedMetadata("clambc.funcmax"); @@ -206,7 +206,7 @@ class ClamBCOutputWriter // Bytecode compile timestamp time_t now = std::chrono::system_clock::to_time_t(std::chrono::system_clock::now()); - //printNumber(now, false); //IT APPEARS THAT I NEED THIS??? + // printNumber(now, false); //IT APPEARS THAT I NEED THIS??? printNumber(OutReal, now, false); const char *user = getenv("SIGNDUSER"); @@ -251,7 +251,7 @@ class ClamBCOutputWriter assert((OutReal.tell() < 8192) && "OutReal too big"); } - void describeType(llvm::raw_ostream &Out, const Type *Ty, Module *M, ClamBCAnalyzer *pAnalyzer) + void describeType(llvm::raw_ostream &Out, const Type *Ty, Module *M, ClamBCAnalysis *pAnalyzer) { if (const FunctionType *FTy = dyn_cast(Ty)) { printFixedNumber(Out, 1, 1); @@ -272,7 +272,7 @@ class ClamBCOutputWriter Type *Ty = STy->getTypeAtIndex(i); if (isa(Ty)) { - //WriteTypeSymbolic(errs(), STy, M); + // WriteTypeSymbolic(errs(), STy, M); assert(0 && "Find replacement for WriteTypeSymbolic"); STy->dump(); @@ -310,7 +310,7 @@ class ClamBCOutputWriter if (const PointerType *PTy = dyn_cast(Ty)) { printFixedNumber(Out, 5, 1); - const Type *ETy = PTy->getElementType(); + const Type *ETy = PTy->getPointerElementType(); // pointers to opaque types are treated as i8* int id = -1; if (llvm::isa(ETy)) { @@ -378,7 +378,7 @@ class ClamBCOutputWriter } return; } - //TODO: better diagnostics here + // TODO: better diagnostics here if (isa(C)) { ClamBCStop("Floating point constants are not supported!", &M); } @@ -402,7 +402,7 @@ class ClamBCOutputWriter ClamBCStop("Unsupported constant type", &M); } - void printGlobals(llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + void printGlobals(llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) { const std::string &ls = pAnalyzer->getLogicalSignature(); if (ls.empty()) { @@ -416,7 +416,7 @@ class ClamBCOutputWriter unsigned tid = pAnalyzer->getStartTID(); const std::vector &extraTypes = pAnalyzer->getExtraTypes(); for (auto I = extraTypes.begin(), E = extraTypes.end(); I != E; ++I) { - //assert(typeIDs[*I] == tid && "internal type ID mismatch"); + // assert(typeIDs[*I] == tid && "internal type ID mismatch"); assert(pAnalyzer->getTypeID(*I) == tid && "internal type ID mismatch"); describeType(Out, *I, pMod, pAnalyzer); tid++; @@ -441,7 +441,7 @@ class ClamBCOutputWriter // function prototype printNumber(Out, pAnalyzer->getTypeID(F->getFunctionType()), false); // function name - std::string Name = F->getName(); + std::string Name(F->getName()); printConstData(Out, (const unsigned char *)Name.c_str(), Name.size() + 1); } @@ -533,14 +533,14 @@ class ClamBCOutputWriter } } - void finished(llvm::Module *pMod, ClamBCAnalyzer *pAnalyzer) + void finished(llvm::Module *pMod, ClamBCAnalysis *pAnalyzer) { - //maxline+1, 1 more for \0 + // maxline+1, 1 more for \0 printModuleHeader(*pMod, pAnalyzer, maxLineLength + 1); OutReal << Out.str(); - //MemoryBuffer *MB = nullptr; + // MemoryBuffer *MB = nullptr; const char *start = NULL; std::string copyright = pAnalyzer->getCopyright(); if (copyright.length()) { @@ -551,17 +551,17 @@ class ClamBCOutputWriter SrcFile = pMod->getSourceFileName(); } if (!SrcFile.empty()) { - //std::string ErrStr; - //MB = MemoryBuffer::getFile(SrcFile, &ErrStr); + // std::string ErrStr; + // MB = MemoryBuffer::getFile(SrcFile, &ErrStr); ErrorOr> mbOrErr = MemoryBuffer::getFile(SrcFile); if (std::error_code ec = mbOrErr.getError()) { ClamBCStop("Unable to (re)open input file: " + SrcFile, pMod); } - //MB = mbOrErr.get(); + // MB = mbOrErr.get(); LLVMMemoryBufferRef mbr = wrap(mbOrErr.get().release()); // mapped file is \0 terminated by getFile() start = unwrap(mbr)->getBufferStart(); - //start = MB->getBufferStart(); + // start = MB->getBufferStart(); } } if (!start) { @@ -576,7 +576,7 @@ class ClamBCOutputWriter c = *start++; } while (c == ' ' || c == '\t'); while (c != '\n' && c) { - //char b[3] = {0x60 | (c & 0xf), 0x60 | ((c >> 4) & 0xf), '\0'}; + // char b[3] = {0x60 | (c & 0xf), 0x60 | ((c >> 4) & 0xf), '\0'}; char b[3]; b[0] = 0x60 | (c & 0xf); b[1] = 0x60 | ((c >> 4) & 0xf); @@ -617,7 +617,7 @@ class ClamBCOutputWriter int maxLineLength = 0; int lastLinePos = 0; llvm::Module *pMod = nullptr; - ClamBCAnalyzer *pAnalyzer = nullptr; + ClamBCAnalysis *pAnalyzer = nullptr; void printFixedNumber(raw_ostream &Out, unsigned n, unsigned fixed) { @@ -635,7 +635,7 @@ class ClamBCOutputWriter static void printNumber(raw_ostream &Out, uint64_t n, bool constant) { - //llvm::errs() << "printNumber" << "::" << n << "::" << constant << "::"; + // llvm::errs() << "printNumber" << "::" << n << "::" << constant << "::"; char number[32]; unsigned i = 0; while (n > 0) { @@ -648,7 +648,7 @@ class ClamBCOutputWriter number[0] = 0x40 | i; } number[++i] = '\0'; - //llvm::errs() << number << "\n"; + // llvm::errs() << number << "\n"; Out << number; } @@ -674,7 +674,7 @@ class ClamBCOutputWriter Out << "|"; printNumber(Out, len, false); for (i = 0; i < len; i++) { - //char b[3] = {0x60 | (s[i] & 0xf), 0x60 | ((s[i] >> 4) & 0xf), '\0'}; + // char b[3] = {0x60 | (s[i] & 0xf), 0x60 | ((s[i] >> 4) & 0xf), '\0'}; char b[3]; b[0] = 0x60 | (s[i] & 0xf); b[1] = 0x60 | ((s[i] >> 4) & 0xf); @@ -684,31 +684,31 @@ class ClamBCOutputWriter } }; -class ClamBCWriter : public ModulePass, public InstVisitor +class ClamBCWriter : public PassInfoMixin, public InstVisitor { typedef DenseMap BBIDMap; BBIDMap BBMap; const Module *TheModule = nullptr; unsigned opcodecvt[Instruction::OtherOpsEnd]; - raw_ostream *MapOut = nullptr; - FunctionPass *Dumper = nullptr; - ClamBCRegAlloc *RA = nullptr; + raw_ostream *MapOut = nullptr; + FunctionPass *Dumper = nullptr; + ClamBCRegAllocAnalysis *RA = nullptr; unsigned fid, minflvl; MetadataContext *TheMetadata = nullptr; unsigned MDDbgKind; std::vector dbgInfo; bool anyDbg; - llvm::Module *pMod = nullptr; - ClamBCOutputWriter *pOutputWriter = nullptr; - ClamBCAnalyzer *pAnalyzer = nullptr; + llvm::Module *pMod = nullptr; + ClamBCOutputWriter *pOutputWriter = nullptr; + ClamBCAnalysis *pAnalyzer = nullptr; + ModuleAnalysisManager *pModuleAnalysisManager = nullptr; public: static char ID; explicit ClamBCWriter() - : ModulePass(ID), - TheModule(0), MapOut(0), Dumper(0) + : TheModule(0), MapOut(0), Dumper(0) { if (!MapFile.empty()) { std::error_code ec; @@ -735,18 +735,20 @@ class ClamBCWriter : public ModulePass, public InstVisitor void getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); - AU.addRequired(); AU.setPreservesAll(); } virtual bool doInitialization(Module &M); - bool runOnModule(Module &m) + PreservedAnalyses run(Module &m, ModuleAnalysisManager &mam) { + doInitialization(m); + pMod = &m; + pModuleAnalysisManager = &mam; - pMod = &m; - pAnalyzer = &getAnalysis(); - pOutputWriter = ClamBCOutputWriter::createClamBCOutputWriter(outFile, pMod, pAnalyzer); + ClamBCAnalysis &analysis = mam.getResult(m); + pAnalyzer = &analysis; + pOutputWriter = ClamBCOutputWriter::createClamBCOutputWriter(outFile, pMod, pAnalyzer); for (auto i = pMod->begin(), e = pMod->end(); i != e; i++) { if (llvm::isa(i)) { @@ -757,7 +759,8 @@ class ClamBCWriter : public ModulePass, public InstVisitor } } - return false; + doFinalization(m); + return PreservedAnalyses::all(); } void gatherGEPs(BasicBlock *pBB, std::vector &geps) @@ -821,7 +824,11 @@ class ClamBCWriter : public ModulePass, public InstVisitor GetElementPtrInst *pNew = nullptr; if (pGep->isInBounds()) { - pNew = GetElementPtrInst::Create(nullptr, ci, newIndex, "ClamBCWriter_fixGEPs", pGep); + Type *pt = ci->getType(); + if (llvm::isa(pt)) { + pt = pt->getPointerElementType(); + } + pNew = GetElementPtrInst::Create(pt, ci, newIndex, "ClamBCWriter_fixGEPs", pGep); } else { assert(0 && "DON'T THINK THIS CAN HAPPEN"); } @@ -837,12 +844,8 @@ class ClamBCWriter : public ModulePass, public InstVisitor bool runOnFunction(Function &F) { - //TODO: Move this to another pass once the Analyzer no longer - //makes changes to the code. fixGEPs(&F); - //Don't think I need this anymore. - //If anything, move it to a verifier. if ("" == F.getName()) { assert(0 && "Function created by ClamBCRebuild is not being deleted"); } @@ -857,10 +860,13 @@ class ClamBCWriter : public ModulePass, public InstVisitor return false; } fid++; - //Removed, see note about getFunctionID at the top of the file. - assert(pAnalyzer->getFunctionID(&F) == fid); - RA = &getAnalysis(F); + // Removed, see note about getFunctionID at the top of the file. + assert(pAnalyzer->getFunctionID(&F) == fid && "Function IDs don't match"); + + FunctionAnalysisManager &fam = pModuleAnalysisManager->getResult(*pMod).getManager(); + + RA = &fam.getResult(F); printFunction(F); if (Dumper) { Dumper->runOnFunction(F); @@ -958,7 +964,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor DEBUGERR << *(GEP.getPointerOperand()) << "\n"; DEBUGERR << *(GEP.getPointerOperand()->getType()) << "\n"; DEBUGERR << iid << "\n"; - //stop("gep1 with type > 65 won't work on interpreter", &GEP); + // stop("gep1 with type > 65 won't work on interpreter", &GEP); assert(0 && "gep1 with type > 65 won't work on interpreter"); } } @@ -976,7 +982,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor if (ConstantInt *CI = dyn_cast(GEP.getOperand(1))) { if (!CI->isZero()) { const PointerType *Ty = cast(GEP.getPointerOperand()->getType()); - const ArrayType *ATy = dyn_cast(Ty->getElementType()); + const ArrayType *ATy = dyn_cast(Ty->getPointerElementType()); if (ATy) { ClamBCStop("ATy", &GEP); } @@ -1150,7 +1156,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor assert(!isa(I.getType())); if (I.getOpcode() == Instruction::Sub) { // sub ptrtoint, ptrtoint - //TODO: push ptrtoinst through phi nodes! + // TODO: push ptrtoinst through phi nodes! LLVMContext &C = I.getContext(); Instruction *LI = dyn_cast(I.getOperand(0)); Instruction *RI = dyn_cast(I.getOperand(1)); @@ -1266,7 +1272,7 @@ class ClamBCWriter : public ModulePass, public InstVisitor break; default: stop("Unsupported icmp predicate", &I); - return; //Removes uninitialized opc warning. + return; // Removes uninitialized opc warning. } printFixedNumber(opc, 2); printType(I.getOperand(0)->getType()); @@ -1387,10 +1393,6 @@ class ClamBCWriter : public ModulePass, public InstVisitor stop("ClamAV bytecode backend does not know about ", &I); } }; -char ClamBCWriter::ID = 0; -static RegisterPass X("clambc-writer", "ClamBCWriter Pass", - false /* Only looks at CFG */, - false /* Analysis Pass */); bool ClamBCWriter::doInitialization(Module &M) { @@ -1420,11 +1422,10 @@ bool ClamBCWriter::doInitialization(Module &M) TheModule = &M; if (DumpDI) { - //TODO: Get debug info working. - //Dumper = createDbgInfoPrinterPass(); + // TODO: Get debug info working. + // Dumper = createDbgInfoPrinterPass(); } - fid = 0; - //OModule->writeGlobalMap(MapOut); + fid = 0; MDDbgKind = M.getContext().getMDKindID("dbg"); return false; @@ -1608,7 +1609,21 @@ void ClamBCWriter::printBasicBlock(BasicBlock *BB) } } -llvm::ModulePass *createClamBCWriter() +// This part is the new way of registering your pass +extern "C" ::llvm::PassPluginLibraryInfo LLVM_ATTRIBUTE_WEAK +llvmGetPassPluginInfo() { - return new ClamBCWriter(); + return { + LLVM_PLUGIN_API_VERSION, "ClamBCWriter", "v0.1", + [](PassBuilder &PB) { + PB.registerPipelineParsingCallback( + [](StringRef Name, ModulePassManager &FPM, + ArrayRef) { + if (Name == "clambc-writer") { + FPM.addPass(ClamBCWriter()); + return true; + } + return false; + }); + }}; } diff --git a/libclambcc/Common/ClamBCUtilities.cpp b/libclambcc/Common/ClamBCUtilities.cpp deleted file mode 100644 index 04bfaadfcc1..00000000000 --- a/libclambcc/Common/ClamBCUtilities.cpp +++ /dev/null @@ -1,141 +0,0 @@ - -#include -#include -#include - -#include "ClamBCUtilities.h" -#include "ClamBCDiagnostics.h" -#include "clambc.h" - -using namespace llvm; - -void ClamBCStop(const Twine &Msg, const Module *M) -{ - printDiagnostic(Msg, M); - exit(42); -} - -void ClamBCStop(const Twine &Msg, const Function *F) -{ - printDiagnostic(Msg, F); - exit(42); -} - -void ClamBCStop(const Twine &Msg, const Instruction *I) -{ - printDiagnostic(Msg, I); - exit(42); -} - -bool functionRecurses(Function *pFunc, Function *orig, std::vector &visited) -{ - if (visited.end() != std::find(visited.begin(), visited.end(), pFunc)) { - return false; - } - visited.push_back(pFunc); - - for (auto funcIter = pFunc->begin(), funcEnd = pFunc->end(); funcIter != funcEnd; funcIter++) { - BasicBlock *bb = llvm::cast(funcIter); - - for (auto blockIter = bb->begin(), blockEnd = bb->end(); blockIter != blockEnd; blockIter++) { - Instruction *inst = llvm::cast(blockIter); - if (CallInst *ci = llvm::dyn_cast(inst)) { - Value *calledValue = ci->getCalledValue(); - if (calledValue == orig) { - return true; - } else if (Function *callee = dyn_cast(calledValue)) { - if (functionRecurses(callee, orig, visited)) { - return true; - } - } - } - } - } - return false; -} - -bool functionRecurses(Function *pFunc) -{ - std::vector visited; - return functionRecurses(pFunc, pFunc, visited); -} - -void getDependentValues(llvm::Value *pv, std::set &insts, - std::set &globs, std::set &ces, - std::set &visited) -{ - if (visited.end() != std::find(visited.begin(), visited.end(), pv)) { - return; - } - - bool first = (0 == visited.size()); - visited.insert(pv); - - if (not first) { - if (llvm::isa(pv)) { - Instruction *inst = llvm::cast(pv); - insts.insert(inst); - } else if (llvm::isa(pv)) { - GlobalVariable *gv = llvm::cast(pv); - globs.insert(gv); - } else if (llvm::isa(pv)) { - GEPOperator *tmp = llvm::cast(pv); - assert(llvm::isa(pv) && "Not a ConstantExpr"); - getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); - } else if (llvm::isa(pv)) { - BitCastOperator *tmp = llvm::cast(pv); - assert(llvm::isa(pv) && "Not a ConstantExpr"); - getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); - } else if (llvm::isa(pv)) { - PtrToIntOperator *tmp = llvm::cast(pv); - assert(llvm::isa(pv) && "Not a ConstantExpr"); - getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); - } else if (llvm::isa(pv)) { - ZExtOperator *tmp = llvm::cast(pv); - assert(llvm::isa(pv) && "Not a ConstantExpr"); - getDependentValues(tmp->getOperand(0), insts, globs, ces, visited); - } - - if (llvm::isa(pv)) { - ConstantExpr *ce = llvm::cast(pv); - ces.insert(ce); - getDependentValues(ce->getOperand(0), insts, globs, ces, visited); - } - } - - for (auto i = pv->user_begin(), e = pv->user_end(); i != e; i++) { - Value *val = llvm::cast(*i); - getDependentValues(val, insts, globs, ces, visited); - } -} - -void getDependentValues(llvm::Value *pv, std::set &insts, - std::set &globs) -{ - std::set ces; - std::set visited; - getDependentValues(pv, insts, globs, ces, visited); -} - -void getDependentValues(llvm::Value *pv, std::set &insts, - std::set &globs, std::set &ces) -{ - std::set visited; - getDependentValues(pv, insts, globs, ces, visited); -} - -bool functionHasLoop(llvm::Function *pFunc, llvm::LoopInfo &loopInfo) -{ - for (auto i = pFunc->begin(), e = pFunc->end(); i != e; i++) { - BasicBlock *pBB = llvm::cast(i); - if (nullptr != loopInfo.getLoopFor(pBB)) { - return true; - } - } - return false; -} - -llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock) -{ - return llvm::cast(pBlock->getParent()->begin()); -} diff --git a/libclambcc/Common/ClamBCUtilities.h b/libclambcc/Common/ClamBCUtilities.h deleted file mode 100644 index a010840aa00..00000000000 --- a/libclambcc/Common/ClamBCUtilities.h +++ /dev/null @@ -1,29 +0,0 @@ -#ifndef CLAMBC_UTILITIES_H_ -#define CLAMBC_UTILITIES_H_ - -#include -#include -#include - -#include - -#include "ClamBCDiagnostics.h" - -/*These are a temporary replacement for ClamBCModule::stop. */ -void ClamBCStop(const llvm::Twine &Msg, const llvm::Module *M); - -void ClamBCStop(const llvm::Twine &Msg, const llvm::Function *F); - -void ClamBCStop(const llvm::Twine &Msg, const llvm::Instruction *I); - -bool functionRecurses(llvm::Function *pFunc); - -void getDependentValues(llvm::Value *pv, std::set &insts, std::set &globs); - -void getDependentValues(llvm::Value *pv, std::set &insts, std::set &globs, std::set &ces); - -bool functionHasLoop(llvm::Function *pFunc, llvm::LoopInfo &loopInfo); - -llvm::BasicBlock *getEntryBlock(llvm::BasicBlock *pBlock); - -#endif // CLAMBC_UTILITIES_H_ diff --git a/libclambcc/Common/bytecode_api.h b/libclambcc/bytecode_api.h similarity index 98% rename from libclambcc/Common/bytecode_api.h rename to libclambcc/bytecode_api.h index 9d565abe549..0f521eda41c 100644 --- a/libclambcc/Common/bytecode_api.h +++ b/libclambcc/bytecode_api.h @@ -73,10 +73,10 @@ enum BytecodeKind { /** specifies a PDF hook, executes at a predetermined point of PDF parsing for PDF files */ BC_PDF, /** specifies a PE hook, executes at a predetermined point in PE parsing for PE files, - * both packed and unpacked files */ + * both packed and unpacked files */ BC_PE_ALL, /** specifies a PRECLASS hook, executes at the end of file property collection and - * operates on the original file targeted for property collection */ + * operates on the original file targeted for property collection */ BC_PRECLASS, /** specifies an ELF unpacker, executed on ELF files on a logical trigger */ BC_ELF_UNPACKER, @@ -90,10 +90,10 @@ enum BytecodeKind { * LibClamAV functionality level constants */ enum FunctionalityLevels { - FUNC_LEVEL_096 = 51, /**< LibClamAV release 0.96.0: bytecode engine released */ - FUNC_LEVEL_096_dev = 52, - FUNC_LEVEL_096_1 = 53, /**< LibClamAV release 0.96.1: logical signature use of VI/macros - * requires this minimum functionality level */ + FUNC_LEVEL_096 = 51, /**< LibClamAV release 0.96.0: bytecode engine released */ + FUNC_LEVEL_096_dev = 52, + FUNC_LEVEL_096_1 = 53, /**< LibClamAV release 0.96.1: logical signature use of VI/macros + * requires this minimum functionality level */ FUNC_LEVEL_096_1_dev = 54, FUNC_LEVEL_096_2 = 54, /**< LibClamAV release 0.96.2: PDF Hooks require this minimum level */ FUNC_LEVEL_096_2_dev = 55, @@ -310,7 +310,7 @@ const uint16_t __clambc_kind; * @param[in] a 0xf00dbeef * @param[in] b 0xbeeff00d * @return 0x12345678 if parameters match, 0x55 otherwise -*/ + */ uint32_t test1(uint32_t a, uint32_t b); /** @@ -588,7 +588,7 @@ uint32_t buffer_pipe_read_avail(int32_t id); * @return pointer to buffer, or NULL if buffer has less than * specified amount */ -//uint8_t *buffer_pipe_read_get(int32_t id, uint32_t amount); +// uint8_t *buffer_pipe_read_get(int32_t id, uint32_t amount); const uint8_t* buffer_pipe_read_get(int32_t id, uint32_t amount); /** @@ -1101,7 +1101,7 @@ uint32_t pdf_getobjsize(int32_t objidx); * @param[in] amount - size returned by pdf_getobjsize (or smaller) * @return NULL - invalid objidx/amount * @return pointer - pointer to original object */ -//uint8_t *pdf_getobj(int32_t objidx, uint32_t amount); +// uint8_t *pdf_getobj(int32_t objidx, uint32_t amount); const uint8_t* pdf_getobj(int32_t objidx, uint32_t amount); /** @@ -1282,9 +1282,9 @@ int32_t json_get_boolean(int32_t objid); */ int32_t json_get_int(int32_t objid); -//int64_t json_get_int64(int32_t objid); +// int64_t json_get_int64(int32_t objid); /* bytecode does not support double type */ -//double json_get_double(int32_t objid); +// double json_get_double(int32_t objid); /* ----------------- END 0.98.4 APIs ---------------------------------- */ /* ----------------- BEGIN 0.101.0 APIs ------------------------------- */ diff --git a/libclambcc/Common/clambc.h b/libclambcc/clambc.h similarity index 87% rename from libclambcc/Common/clambc.h rename to libclambcc/clambc.h index 3d790a1f9e5..f38b9de10ac 100644 --- a/libclambcc/Common/clambc.h +++ b/libclambcc/clambc.h @@ -134,7 +134,19 @@ enum bc_global { #ifndef DEBUGERR #define DEBUGERR llvm::errs() << "<" << __FILE__ << "::" << __FUNCTION__ << "::" << __LINE__ << ">" -#endif //DEBUGERR +#endif // DEBUGERR + +#ifndef DEBUG_WHERE +#define DEBUG_WHERE llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">\n" +#endif + +#ifndef DEBUG_VALUE +#define DEBUG_VALUE(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << *__value__ << "\n"; +#endif + +#ifndef DEBUG_NONPOINTER +#define DEBUG_NONPOINTER(__value__) llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">" << __value__ << "\n"; +#endif #define BC_START_TID 69 #endif diff --git a/libclambcc/Common/version.c b/libclambcc/version.c similarity index 100% rename from libclambcc/Common/version.c rename to libclambcc/version.c diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 9729b791a1c..17b1901dc08 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -14,7 +14,7 @@ if(WIN32) file(TO_NATIVE_PATH ${sigtool_EXECUTABLE} SIGTOOL) file(TO_NATIVE_PATH ${clambc_headers_DIRECTORY} HEADERS) else() - set(LD_LIBRARY_PATH $:$ENV{LD_LIBRARY_PATH}) + set(LD_LIBRARY_PATH $:$ENV{LD_LIBRARY_PATH}) set(SOURCE ${CMAKE_SOURCE_DIR}) set(BUILD ${CMAKE_BINARY_DIR})