Skip to content

Commit

Permalink
Upgraded bytecode compiler to work with Clang/LLVM 16
Browse files Browse the repository at this point in the history
  • Loading branch information
ragusaa committed Dec 18, 2023
1 parent d8eddfb commit afac373
Show file tree
Hide file tree
Showing 68 changed files with 5,729 additions and 1,534 deletions.
32 changes: 19 additions & 13 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@ project( ClamBCC
DESCRIPTION "ClamAV Bytecode Compiler." )

set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
include(Version)
#include(Version)

set(PACKAGE_NAME "${PROJECT_NAME}")
set(PACKAGE_VERSION "${PROJECT_VERSION}")
set(PACKAGE_STRING "${PROJECT_NAME} ${PROJECT_VERSION}${VERSION_SUFFIX}")
set(PACKAGE_BUGREPORT "https://github.com/Cisco-Talos/clamav-bytecode-compiler/issues")
set(PACKAGE_URL "https://www.clamav.net/")
HexVersion(PACKAGE_VERSION_NUM ${PROJECT_VERSION_MAJOR} ${PROJECT_VERSION_MINOR} ${PROJECT_VERSION_PATCH})
#HexVersion(PACKAGE_VERSION_NUM ${PROJECT_VERSION_MAJOR} ${PROJECT_VERSION_MINOR} ${PROJECT_VERSION_PATCH})

# libtool library versioning rules: http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
set(LIBCLAMBC_CURRENT 1)
Expand All @@ -40,7 +40,7 @@ set(LIBCLAMBC_AGE 0)

math(EXPR LIBCLAMBC_SOVERSION "${LIBCLAMBC_CURRENT} - ${LIBCLAMBC_AGE}")
set(LIBCLAMBC_VERSION "${LIBCLAMBC_SOVERSION}.${LIBCLAMBC_AGE}.${LIBCLAMBC_REVISION}")
HexVersion(LIBCLAMBC_VERSION_NUM ${LIBCLAMBC_CURRENT} ${LIBCLAMBC_REVISION} ${LIBCLAMBC_AGE})
#HexVersion(LIBCLAMBC_VERSION_NUM ${LIBCLAMBC_CURRENT} ${LIBCLAMBC_REVISION} ${LIBCLAMBC_AGE})

# Git optionally used to add commit info into build to differentiate in bug reports.
find_package(Git)
Expand Down Expand Up @@ -103,10 +103,10 @@ if(ENABLE_TESTS)
set(Python3_TEST_PACKAGE "pytest;-v")
endif()

find_package(ClamAV REQUIRED)
#find_package(ClamAV REQUIRED)
endif()

find_package(LLVM 8 REQUIRED)
find_package(LLVM 16 REQUIRED)

# Do not disable assertions based on CMAKE_BUILD_TYPE.
foreach(_build_type "Release" "MinSizeRel" "RelWithDebInfo")
Expand Down Expand Up @@ -187,10 +187,15 @@ configure_file(clambc-version.h.in clambc-version.h)
# Build targets!
#

include(AddLLVM)

# The bytecode compiler optimization passes
# This is the core of the bytecode compiler
add_subdirectory(libclambcc)

# Examples of plugins for the new and legacy pass managers.
add_subdirectory(examples)

# The bytecode compiler application
# This is really just a python script
add_subdirectory(clambcc)
Expand All @@ -212,17 +217,18 @@ add_subdirectory(headers)
# `pandoc -s file.tex -o file.md` mostly-works, but w/ the doxygen integration is insufficient.
# add_subdirectory(docs)

if(ENABLE_EXAMPLES)
# Example optimization passes; boilerplate to help compiler devs write new passes.
add_subdirectory( examples )
endif()
#if(ENABLE_EXAMPLES)
# # Example optimization passes; boilerplate to help compiler devs write new passes.
# add_subdirectory( examples )
#endif()

include(CTest)

add_custom_target(check COMMAND ${CMAKE_CTEST_COMMAND})
if(ENABLE_TESTS)
# Tests to verify compiler works as intended and that signatures behave as intended.
add_subdirectory( test )
endif()
#if(ENABLE_TESTS)
# # Tests to verify compiler works as intended and that signatures behave as intended.
# add_subdirectory( test )
#endif()

if(WIN32)
# Include the license(s) in the installation
Expand Down
208 changes: 176 additions & 32 deletions clambcc/clambc-compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,11 @@


#These are the list of supported versions
#consider changing this to start at 8 and go up to 99. That will cover us
#from having to update this when new versions come out.
CLANG_LLVM_KNOWN_VERSIONS = [8, 9, 10, 11, 12]
CLANG_LLVM_KNOWN_VERSIONS = [16]

#This is the min clang/llvm version this has been tested with.
MIN_CLANG_LLVM_VERSION = 8
PREFERRED_CLANG_LLVM_VERSION = 8
MIN_CLANG_LLVM_VERSION = 16
PREFERRED_CLANG_LLVM_VERSION = 16

CLANG_NAME = "clang"
LLVM_NAME = "opt"
Expand Down Expand Up @@ -52,7 +50,7 @@
FOUND_SHARED_OBJ = False

SHARED_OBJ_DIR = Path(__file__).parent / '..' / 'lib'
if (SHARED_OBJ_DIR / 'libclambcc.so').exists():
if (SHARED_OBJ_DIR / 'libclambccommon.so').exists():
SHARED_OBJ_FILE = SHARED_OBJ_DIR / 'libclambcc.so'
FOUND_SHARED_OBJ = True

Expand Down Expand Up @@ -112,7 +110,16 @@ def validate(self) -> bool:
def run(cmd: str) -> int:
if VERBOSE:
print(cmd)
return os.system(cmd)
#return os.system(cmd)

ret = os.system(cmd)
if ret:
print (cmd)
print (ret)
sys.exit(1)

return ret



def die(msg: str, exitStatus: int) -> None:
Expand Down Expand Up @@ -180,17 +187,26 @@ def compileFile(clangLLVM: ClangLLVM, fileName: str, debugBuild: bool, standardC
for d in options.defines:
defines += f"-D{d} "

print ("TODO: Put clang options in a list")
cmd = f"{clangLLVM.getClang()} \
-S \
-fno-discard-value-names \
--language=c \
-emit-llvm \
-Werror=unused-command-line-argument \
-Xclang \
-disable-O0-optnone \
-o {outFile} \
{fileName} \
"
-S \
-fno-discard-value-names \
-Wno-implicit-function-declaration \
-fno-vectorize \
--language=c \
-emit-llvm \
-Werror=unused-command-line-argument \
-Xclang \
-disable-O0-optnone \
-Xclang -no-opaque-pointers \
{fileName} \
-o \
{outFile} \
-I \
{INCDIR} \
-include \
bytecode.h \
-D__CLAMBC__"

cmd += f" \
{includePaths} \
Expand All @@ -201,11 +217,12 @@ def compileFile(clangLLVM: ClangLLVM, fileName: str, debugBuild: bool, standardC
cmd += " -g \
"

if (not standardCompiler):
cmd += f" -I {INCDIR} \
-include bytecode.h \
-D__CLAMBC__ \
"
#TODO: Remove the 'standardCompiler' thing
# if (not standardCompiler):
# cmd += f" -I {INCDIR} \
# -include bytecode.h \
# -D__CLAMBC__ \
# "

if options.disableCommonWarnings:
cmd += COMMON_WARNING_OPTIONS
Expand Down Expand Up @@ -261,7 +278,8 @@ def linkIRFiles(clangLLVM: ClangLLVM, linkedFile: str, irFiles: list) -> int:
Returns the exit status code for the call to `llvm-link`.
'''
inFiles = " ".join(irFiles)
cmd = f"{clangLLVM.getLLVMLink()} -S -o {linkedFile} {inFiles}"
print ("TODO: Put llvm-link args in a list")
cmd = f"{clangLLVM.getLLVMLink()} -opaque-pointers=0 -S -o {linkedFile} {inFiles}"

return run(cmd)

Expand Down Expand Up @@ -444,13 +462,15 @@ def getOutputString(linked: IRFile, ignore: IRFile) -> str:
def createOptimizedTmpFile(clangLLVM: ClangLLVM, linkedFile: str) -> str:
name = getOptimizedTmpFileName(linkedFile)

cmd = f"{clangLLVM.getOpt()} \
cmd = f'{clangLLVM.getOpt()} \
-S \
{linkedFile} \
-o {name} \
-internalize -internalize-public-api-list=entrypoint \
-globalopt \
"
-internalize-public-api-list=entrypoint \
--passes="internalize,globalopt" \
'

print ("TODO: put this in an array")

ret = run(cmd)
if None == ret:
Expand Down Expand Up @@ -491,11 +511,122 @@ def createInputSourceFile(clangLLVM: ClangLLVM, name: str, args: list, options:
return res


INTERNALIZE_API_LIST=[ "_Z10entrypointv"
, "entrypoint"
, "__clambc_kind"
, "__clambc_virusname_prefix"
, "__clambc_virusnames"
, "__clambc_filesize"
, "__clambc_match_counts"
, "__clambc_match_offsets"
, "__clambc_pedata"
, "__Copyright"
]

OPTIMIZE_OPTIONS = ["-S"
, "--disable-loop-unrolling"
, " --disable-i2p-p2i-opt"
, " --disable-loop-unrolling"
, " --disable-promote-alloca-to-lds"
, " --disable-promote-alloca-to-vector"
, " --disable-simplify-libcalls"
, " --disable-tail-calls"
, " --vectorize-slp=false"
, " --vectorize-loops=false"
, " -internalize-public-api-list=\"%s\"" % ','.join(INTERNALIZE_API_LIST)
]

#TODO: Remove this when we properly handle opaque pointers.
OPTIMIZE_OPTIONS.append("-opaque-pointers=0")

OPTIMIZE_PASSES = ["function(mem2reg)"
, 'verify'
, 'clambc-preserve-abis'
, 'verify'
, 'default<O3>'
, 'globalopt'
, 'clambc-preserve-abis' #remove fake function calls because O3 has already run
, 'verify'
, 'clambc-remove-unsupported-icmp-intrinsics'
, 'verify'
, 'clambc-remove-usub'
, 'verify'
, 'clambc-remove-fshl'
, 'verify'
, 'clambc-lowering-notfinal' # perform lowering pass
, 'verify'
, 'lowerswitch'
, 'verify'
, 'clambc-remove-icmp-sle'
, 'verify'
, 'function(clambc-verifier)'
, 'verify'
, 'clambc-remove-freeze-insts'
, 'verify'
, 'clambc-lowering-notfinal' # perform lowering pass
, 'verify'
, 'clambc-lcompiler-helper' #compile the logical_trigger function to a
, 'verify'
, 'clambc-lcompiler' #compile the logical_trigger function to a
, 'verify'
, 'internalize'
, 'verify'
, 'clambc-rebuild'
, 'verify'
, 'clambc-trace'
, 'verify'
, 'clambc-outline-endianness-calls'
, 'verify'
, 'clambc-change-malloc-arg-size'
, 'verify'
, 'clambc-extend-phis-to-64-bit'
, 'verify'
, 'clambc-convert-memsets-to-32Bit'
, 'verify'
, 'globalopt'
, 'clambc-prepare-geps-for-writer'
, 'verify'
, 'clambc-writer'
, 'verify'
]

OPTIMIZE_LOADS=[ f"--load {SHARED_OBJ_DIR}/libclambccommon.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveundefs.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcpreserveabis.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveunsupportedicmpintrinsics.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveusub.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovefshl.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovepointerphis.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcloweringnf.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremoveicmpsle.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcverifier.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcremovefreezeinsts.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcloweringf.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambclogicalcompilerhelper.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambclogicalcompiler.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcrebuild.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambctrace.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcoutlineendiannesscalls.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcchangemallocargsize.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcextendphisto64bit.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcconvertmemsetsto32bit.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcpreparegepsforwriter.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcanalyzer.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcregalloc.so"
, f"--load-pass-plugin {SHARED_OBJ_DIR}/libclambcwriter.so"
]



def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inputSourceFile: str, standardCompiler: bool) -> int:

"""
internalizeAPIList = "_Z10entrypointv,entrypoint,__clambc_kind,__clambc_virusname_prefix,__clambc_virusnames,__clambc_filesize,__clambc_match_counts,__clambc_match_offsets,__clambc_pedata,__Copyright"
if standardCompiler:
internalizeAPIList += ",main"
# if standardCompiler:
# internalizeAPIList += ",main"
#TODO: Modify ClamBCRemoveUndefs to not require mem2reg to be run before it.
cmd = (f'{clangLLVM.getOpt()} '
Expand Down Expand Up @@ -574,16 +705,29 @@ def optimize(clangLLVM: ClangLLVM, inFile: str, outFile: str, sigFile: str, inpu
#otherwise the writer gets
#unhappy.
f' -globalopt'
f' -clambc-convert-intrinsics' #convert all memset intrinsics to
f' -clambc-convert-memsets-to-32Bit' #convert all memset intrinsics to
#the 32-bit instead of the 64-bit
#intrinsic
f' -clambc-writer' #write the bytecode
f' -clambc-writer-input-source={inputSourceFile}'
f' -clambc-sigfile={sigFile}'
)
"""



cmd = f"{clangLLVM.getOpt()} %s -o %s %s %s --passes=\"%s\" -clambc-writer-input-source=%s -clambc-sigfile=%s" % (
inFile
, outFile
, " ".join(OPTIMIZE_OPTIONS)
, " ".join(OPTIMIZE_LOADS)
, ",".join(OPTIMIZE_PASSES)
, inputSourceFile
, sigFile
)

if standardCompiler:
cmd += f" -clambc-standard-compiler"
# if standardCompiler:
# cmd += f" -clambc-standard-compiler"

return run(cmd)

Expand Down
Loading

0 comments on commit afac373

Please sign in to comment.