From 2beadb7719412d5c9a29f2da8635cf449e584bc7 Mon Sep 17 00:00:00 2001 From: Andy Ragusa Date: Fri, 5 Apr 2024 09:10:37 -0700 Subject: [PATCH] Fixed error message, as well as bug fixes found in regression --- NEWS.md | 3 +- clambcc/clambc-compiler.py | 8 ++- examples/PassManager/CMakeLists.txt | 2 +- headers/bcfeatures.h | 2 +- headers/bytecode_api.h | 2 +- headers/bytecode_detect.h | 2 +- libclambcc/CMakeLists.txt | 30 +++++----- libclambcc/ClamBCAnalyzer.h | 5 -- libclambcc/ClamBCChangeMallocArgSize.cpp | 4 +- libclambcc/ClamBCLogicalCompiler.cpp | 10 +--- libclambcc/ClamBCLowering.cpp | 1 - libclambcc/ClamBCRebuild.cpp | 33 +++++++++++ libclambcc/ClamBCRemovePointerPHIs.cpp | 74 ++++++++++++++++++++++-- libclambcc/ClamBCUtilities.cpp | 2 +- libclambcc/ClamBCVerifier.cpp | 16 ++--- 15 files changed, 139 insertions(+), 55 deletions(-) diff --git a/NEWS.md b/NEWS.md index 41c80b3990..387f611562 100644 --- a/NEWS.md +++ b/NEWS.md @@ -26,7 +26,8 @@ For example: - The bytecode compiler project now builds multiple shared object files, instead of just one with all of the passes. This is due to running with the "new" pass manager, instead of running with the legacy pass manager, - as before. + as before. See https://llvm.org/docs/NewPassManager.html and + https://blog.llvm.org/posts/2021-03-26-the-new-pass-manager/ for more details. - The bytecode compiler currently uses (deprecated) non-opaque pointers. Updating to all opaque pointers will be required for the next release. See https://llvm.org/docs/OpaquePointers.html for more information. diff --git a/clambcc/clambc-compiler.py b/clambcc/clambc-compiler.py index 164e2fac8b..cb8b304b55 100755 --- a/clambcc/clambc-compiler.py +++ b/clambcc/clambc-compiler.py @@ -254,7 +254,7 @@ def getInputSourceFileName(outputFileName: str) -> str: def getOptimizedTmpFileName(linkedFile: str) -> str: idx = linkedFile.find(LINKED_BYTECODE_FILE_EXTENSION) if -1 == idx: - die("getLinkedFileName called with invalid input", 2) + die("getOptimizedTmpFileName called with invalid input", 2) return f"{linkedFile[0:idx]}{OPTIMIZED_TMP_BYTECODE_FILE_EXTENSION}" @@ -548,6 +548,8 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: , 'globalopt' , 'clambc-preserve-abis' #remove fake function calls because O3 has already run , 'verify' +# , 'clambc-remove-pointer-phis' +# , 'verify' , 'clambc-remove-unsupported-icmp-intrinsics' , 'verify' , 'clambc-remove-usub' @@ -574,6 +576,8 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: , 'verify' , 'clambc-rebuild' , 'verify' + , 'clambc-remove-pointer-phis' + , 'verify' , 'clambc-trace' , 'verify' , 'clambc-outline-endianness-calls' @@ -603,7 +607,7 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options: , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUnsupportedICMPIntrinsics.so" , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveUSUB.so" , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveFSHL.so" -# , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemovePointerPHIs.so" #Not needed, since clambc-remove-pointer-phis is not being used. + , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemovePointerPHIs.so" #Not needed, since clambc-remove-pointer-phis is not being used. , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCLoweringNF.so" , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCRemoveICMPSLE.so" , f"--load-pass-plugin {SHARED_OBJ_DIR}/libClamBCVerifier.so" diff --git a/examples/PassManager/CMakeLists.txt b/examples/PassManager/CMakeLists.txt index 327f568880..3d8b83e666 100644 --- a/examples/PassManager/CMakeLists.txt +++ b/examples/PassManager/CMakeLists.txt @@ -1,3 +1,3 @@ -# Copyright (C) 2021-2014 Cisco Systems, Inc. and/or its affiliates. All rights reserved. +# Copyright (C) 2021-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. add_subdirectory(AnalysisPlugin) diff --git a/headers/bcfeatures.h b/headers/bcfeatures.h index 86b5879a6c..cf556b3891 100644 --- a/headers/bcfeatures.h +++ b/headers/bcfeatures.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * Authors: Török Edvin diff --git a/headers/bytecode_api.h b/headers/bytecode_api.h index 5f4f1b4e0b..04cdf7da2d 100644 --- a/headers/bytecode_api.h +++ b/headers/bytecode_api.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * Authors: Török Edvin, Kevin Lin diff --git a/headers/bytecode_detect.h b/headers/bytecode_detect.h index 1e2efc00f3..039446b61f 100644 --- a/headers/bytecode_detect.h +++ b/headers/bytecode_detect.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013-2023 Cisco Systems, Inc. and/or its affiliates. All rights reserved. + * Copyright (C) 2013-2024 Cisco Systems, Inc. and/or its affiliates. All rights reserved. * Copyright (C) 2009-2013 Sourcefire, Inc. * * Redistribution and use in source and binary forms, with or without diff --git a/libclambcc/CMakeLists.txt b/libclambcc/CMakeLists.txt index dd09b2560b..340caeda6c 100644 --- a/libclambcc/CMakeLists.txt +++ b/libclambcc/CMakeLists.txt @@ -277,21 +277,21 @@ target_link_directories(ClamBCRemoveICMPSLE PRIVATE ${LLVM_LIBRARY_DIRS}) target_link_libraries(ClamBCRemoveICMPSLE PUBLIC ${LLVM_LIBS}) install(TARGETS ClamBCRemoveICMPSLE DESTINATION ${CMAKE_INSTALL_LIBDIR}) -# # -# # The ClamBCRemovePointerPHIs shared library. -# # -# add_library(ClamBCRemovePointerPHIs SHARED -# ClamBCRemovePointerPHIs.cpp) -# target_include_directories(ClamBCRemovePointerPHIs PRIVATE -# ${LLVM_INCLUDE_DIRS}) -# set_target_properties(ClamBCRemovePointerPHIs PROPERTIES -# COMPILE_FLAGS "${WARNCXXFLAGS}" -# VERSION ${LIBCLAMBC_VERSION} -# SOVERSION ${LIBCLAMBC_SOVERSION}) -# #target_compile_definitions(ClamBCRemovePointerPHIs -DLOG_BEFORE_AFTER=1) # For testing -# target_link_directories(ClamBCRemovePointerPHIs PRIVATE ${LLVM_LIBRARY_DIRS}) -# target_link_libraries(ClamBCRemovePointerPHIs PUBLIC ${LLVM_LIBS}) -# install(TARGETS ClamBCRemovePointerPHIs DESTINATION ${CMAKE_INSTALL_LIBDIR}) +# +# The ClamBCRemovePointerPHIs shared library. +# +add_library(ClamBCRemovePointerPHIs SHARED +ClamBCRemovePointerPHIs.cpp) +target_include_directories(ClamBCRemovePointerPHIs PRIVATE +${LLVM_INCLUDE_DIRS}) +set_target_properties(ClamBCRemovePointerPHIs PROPERTIES +COMPILE_FLAGS "${WARNCXXFLAGS}" +VERSION ${LIBCLAMBC_VERSION} +SOVERSION ${LIBCLAMBC_SOVERSION}) +#target_compile_definitions(ClamBCRemovePointerPHIs -DLOG_BEFORE_AFTER=1) # For testing +target_link_directories(ClamBCRemovePointerPHIs PRIVATE ${LLVM_LIBRARY_DIRS}) +target_link_libraries(ClamBCRemovePointerPHIs PUBLIC ${LLVM_LIBS}) +install(TARGETS ClamBCRemovePointerPHIs DESTINATION ${CMAKE_INSTALL_LIBDIR}) # # # # The ClamBCRemoveUndefs shared library. diff --git a/libclambcc/ClamBCAnalyzer.h b/libclambcc/ClamBCAnalyzer.h index a9a7930769..325d61f25f 100644 --- a/libclambcc/ClamBCAnalyzer.h +++ b/libclambcc/ClamBCAnalyzer.h @@ -118,11 +118,6 @@ class ClamBCAnalysis virtual uint32_t getTypeID(const llvm::Type *const t) { TypeMapTy::iterator I = typeIDs.find(t); - if (I == typeIDs.end()) { - DEBUG_NONPOINTER("BAD VALUE"); - DEBUG_VALUE(t); - } - assert((I != typeIDs.end()) && "Type ID requested for unknown type"); return I->second; } diff --git a/libclambcc/ClamBCChangeMallocArgSize.cpp b/libclambcc/ClamBCChangeMallocArgSize.cpp index 7b3dcb868f..c8e9da5972 100644 --- a/libclambcc/ClamBCChangeMallocArgSize.cpp +++ b/libclambcc/ClamBCChangeMallocArgSize.cpp @@ -145,9 +145,7 @@ class ChangeMallocArgSize : public PassInfoMixin virtual PreservedAnalyses run(Module& m, ModuleAnalysisManager& MAM) { - pMod = &m; - DEBUGERR << "TODO: Evaluate whether or not we still need this." - << "\n"; + pMod = &m; dstType = Type::getInt64Ty(pMod->getContext()); findSizes(); diff --git a/libclambcc/ClamBCLogicalCompiler.cpp b/libclambcc/ClamBCLogicalCompiler.cpp index 013dff694b..f5b2a19ab2 100644 --- a/libclambcc/ClamBCLogicalCompiler.cpp +++ b/libclambcc/ClamBCLogicalCompiler.cpp @@ -206,8 +206,6 @@ class LogicalNode : public FoldingSetNode if (Node->kind == LOG_ADD) { ConstantRange Cmp(APInt(32, value)); // a + c < b -> a+c in [0, b) -> a in [0-c, b-c) - /*TODO: Determine if makeSatisfyingICmpRegin is better than makeAllowedICmpRegion, - * If this is changed, check the rest.*/ ConstantRange ltRange = ConstantRange::makeSatisfyingICmpRegion(CmpInst::ICMP_ULT, Cmp); ltRange = ltRange.subtract(APInt(32, Node->op0)); @@ -327,10 +325,7 @@ class LogicalNode : public FoldingSetNode return getNode(M); } - /* - * aragusa: All this is doing is checking for duplicates in whatever collection begin and end reference. - * Why are we putting them in another local? - * */ + /*Test for duplicates*/ bool checkUniq() { LogicalSet nodes; @@ -1731,8 +1726,6 @@ bool ClamBCLogicalCompiler::compileVirusNames(Module &M, unsigned kind) if (F != pCallInst->getCalledFunction()) { - llvm::errs() << "<" << __FUNCTION__ << "::" << __LINE__ << ">NOT SURE HOW THIS IS POSSIBLE\n"; - /*Not sure how this is possible, either*/ printDiagnostic("setvirusname can only be directly called", pCallInst); @@ -1811,7 +1804,6 @@ PreservedAnalyses ClamBCLogicalCompiler::run(Module &M, ModuleAnalysisManager &M } if (!compileVirusNames(M, kind)) { if (!kind || kind == BC_STARTUP) { - // return true; return PreservedAnalyses::all(); } Valid = false; diff --git a/libclambcc/ClamBCLowering.cpp b/libclambcc/ClamBCLowering.cpp index 2df1cbebb2..9d968ddd95 100644 --- a/libclambcc/ClamBCLowering.cpp +++ b/libclambcc/ClamBCLowering.cpp @@ -125,7 +125,6 @@ void ClamBCLowering::lowerIntrinsics(IntrinsicLowering *IL, Function &F) GetElementPtrInst *GEP = dyn_cast(PI->getOperand(0)); if (GEP && GEP->getNumOperands() == 2) { Value *V1 = GEP->getOperand(1); - // if (GEP->getType()->getElementType() == Type::getInt8Ty(F.getContext())) { if (GEP->getSourceElementType() == Type::getInt8Ty(F.getContext())) { Value *P0 = Builder.CreatePtrToInt(GEP->getOperand(0), V1->getType()); diff --git a/libclambcc/ClamBCRebuild.cpp b/libclambcc/ClamBCRebuild.cpp index e0f1ed0692..93f3968783 100644 --- a/libclambcc/ClamBCRebuild.cpp +++ b/libclambcc/ClamBCRebuild.cpp @@ -177,6 +177,7 @@ class ClamBCRebuild : public PassInfoMixin, public InstVisitor, public InstVisitor(v)) { + if (bci->getSrcTy() == t) { + return bci->getOperand(0); + } + + return findDuplicateType(bci->getOperand(0), t); + } + return nullptr; + } + Value *makeCast(Value *V, Type *Ty) { + + Value *v = findDuplicateType(V, Ty); + if (v) { + return v; + } + if (V->getType() == Ty) { return V; } @@ -513,6 +540,12 @@ class ClamBCRebuild : public PassInfoMixin, public InstVisitorisIntegerTy()) { V = Builder->CreateBitCast(V, Ty, "ClamBCRebuild_cast"); } else if (Ty->isPointerTy()) { // A CompositeType + + /*This appears to be necessary for 0.103 on windows.*/ + if (Ty != i8pTy) { + V = Builder->CreatePointerCast(V, i8pTy, "ClamBCRebuild"); + } + V = Builder->CreatePointerCast(V, Ty, "ClamBCRebuild"); } else { stop("Type conversion unhandled in ClamAV Bytecode Backend Rebuilder", &I); diff --git a/libclambcc/ClamBCRemovePointerPHIs.cpp b/libclambcc/ClamBCRemovePointerPHIs.cpp index b84e7bf4ed..5c00299e1b 100644 --- a/libclambcc/ClamBCRemovePointerPHIs.cpp +++ b/libclambcc/ClamBCRemovePointerPHIs.cpp @@ -19,6 +19,10 @@ using namespace llvm; #include +/* + * This Pass is only needed for 0.103 on Windows, so when we no longer need to support 0.103, it can be removed. + */ + namespace { class ClamBCRemovePointerPHIs : public PassInfoMixin @@ -226,6 +230,10 @@ class ClamBCRemovePointerPHIs : public PassInfoMixin std::vector newInsts; Instruction *insPt = findFirstNonPHI(pn->getParent()); + if (pBasePtr->getType() != pn->getType()) { + pBasePtr = CastInst::CreatePointerCast(pBasePtr, pn->getType(), "ClamBCRemovePointerPHIs_cast_", insPt); + } + PointerType *pt = llvm::dyn_cast(pBasePtr->getType()); if (nullptr == pt) { assert(0 && "This pass is only for pointer phis, how did we get here???"); @@ -248,6 +256,10 @@ class ClamBCRemovePointerPHIs : public PassInfoMixin Value *incVal = pgepi->getOperand(1); + if (incVal->getType() != pType) { + incVal = CastInst::CreateIntegerCast(incVal, pType, false, "ClamBCRemovePointerPHIs_cast_", pgepi); + } + Instruction *add = BinaryOperator::Create(Instruction::Add, idxNode, incVal, "ClamBCRemovePointerPHIs_add_", pgepi); BasicBlock *pred = findPredecessor(idxNode->getParent(), pgepi->getParent(), omitNodes); assert(pred && "Could not find predecessor"); @@ -288,17 +300,67 @@ class ClamBCRemovePointerPHIs : public PassInfoMixin return true; } + /*The idea here is that we get the insertion point for where our calculated value + * will be saved. pInst is the value we want to save, so it will have to be*/ + Instruction *getInsertionPoint(Instruction *pInst) + { + BasicBlock *pBB = pInst->getParent(); + bool canBreak = false; + Instruction *pRet = nullptr; + for (auto i = pBB->begin(), e = pBB->end(); i != e; i++) { + pRet = llvm::cast(i); + if (canBreak && (not llvm::isa(pRet))) { + break; + } + if (pRet == pInst) { + canBreak = true; + } + } + return pRet; + } + + /* Load the value from our AllocaInst, and + * replace the PHINode everywhere it is used.*/ + virtual void replaceUses(PHINode *pn, AllocaInst *pai) + { + std::vector users; + for (auto i = pn->user_begin(), e = pn->user_end(); i != e; i++) { + if (Instruction *pUser = llvm::dyn_cast(*i)) { + users.push_back(pUser); + } + } + + for (size_t i = 0; i < users.size(); i++) { + Instruction *pUser = users[i]; + Instruction *insPt = nullptr; + + if (PHINode *pnUser = llvm::dyn_cast(pUser)) { + for (size_t j = 0; j < pnUser->getNumIncomingValues(); j++) { + if (pn == pnUser->getIncomingValue(j)) { + insPt = pnUser->getIncomingBlock(j)->getTerminator(); + break; + } + } + } else { + insPt = pUser; + } + + LoadInst *pli = new LoadInst(pn->getType(), pai, "ClamBCRemovePointerPHIs_load_", insPt); + for (size_t j = 0; j < pUser->getNumOperands(); j++) { + if (pn == pUser->getOperand(j)) { + pUser->setOperand(j, pli); + break; + } + } + } + } + public: ClamBCRemovePointerPHIs() {} + /*This is only necessary for 0.103 on windows.*/ virtual PreservedAnalyses run(Module &m, ModuleAnalysisManager &mam) { - - /*Currently unused. Will remove after the RC phase.*/ - DEBUGERR << "TODO: EVALUATE WHETHER OR NOT I NEED THIS" - << "\n"; - return PreservedAnalyses::all(); - pMod = &m; bool ret = false; diff --git a/libclambcc/ClamBCUtilities.cpp b/libclambcc/ClamBCUtilities.cpp index f94c5c5810..8f9f7b58ec 100644 --- a/libclambcc/ClamBCUtilities.cpp +++ b/libclambcc/ClamBCUtilities.cpp @@ -288,7 +288,7 @@ void gatherCallsToIntrinsic(Function *pFunc, const char *const functionName, std for (auto bi = pBB->begin(), be = pBB->end(); bi != be; bi++) { if (CallInst *pci = llvm::dyn_cast(bi)) { Function *pCalled = pci->getCalledFunction(); - if (pCalled->isIntrinsic()) { + if (pCalled && pCalled->isIntrinsic()) { if (functionName == pCalled->getName()) { calls.push_back(pci); } diff --git a/libclambcc/ClamBCVerifier.cpp b/libclambcc/ClamBCVerifier.cpp index 1727df921b..399466448b 100644 --- a/libclambcc/ClamBCVerifier.cpp +++ b/libclambcc/ClamBCVerifier.cpp @@ -162,19 +162,19 @@ class ClamBCVerifier : public PassInfoMixin, Function *getCalledFunctionFromCallInst(CallInst *pci) { - Function *ret = pci->getCalledFunction(); + + Value *pCalledOperand = pci->getCalledOperand(); + Function *ret = llvm::dyn_cast(pCalledOperand); if (nullptr == ret) { - Value *v = pci->getOperand(0); /*This is the called operand.*/ - if (nullptr == v) { - llvm::errs() << "<" << __LINE__ << ">" << *pci << "\n"; - llvm::errs() << "<" << __LINE__ << ">" << *(pci->getOperand(0)) << "\n"; - assert(0 && "How do I handle function pointers?"); - } - if (BitCastOperator *bco = llvm::dyn_cast(v)) { + if (BitCastOperator *bco = llvm::dyn_cast(pCalledOperand)) { ret = llvm::dyn_cast(bco->getOperand(0)); } } + if (nullptr == ret) { + ClamBCStop("Verifier unable to get called function from call instruction", pci); + } + return ret; }