Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Changes and fixes to run dev14 successfully CI #156

Merged
merged 10 commits into from
Sep 16, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions .github/workflows/test-seadsa-docker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,11 @@ name: CI

# Controls when the action will run.
on:
# Triggers the workflow on push or pull request events but only for the dev10 branch
# Triggers the workflow on push or pull request events but only for the dev14 branch
push:
branches: dev10
branches: dev14
pull_request:
branches: dev10
branches: dev14

# A workflow run is made up of one or more jobs that can run sequentially or in parallel
jobs:
Expand All @@ -23,6 +23,6 @@ jobs:
- name: Check out repo
uses: actions/checkout@v2
with:
ref: dev10 # only checkout dev10
ref: dev14 # only checkout dev14
- name: Build seadsa and run tests
run: docker build -t seahorn/sea-dsa-builder:bionic-llvm10 -f docker/sea-dsa-builder.Dockerfile .
run: docker build -t seahorn/sea-dsa-builder:jammy-llvm14 -f docker/sea-dsa-builder.Dockerfile .
31 changes: 1 addition & 30 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,43 +53,14 @@ option (SEA_DSA_STATIC_EXE "Static executable." OFF)

## llvm
if (TOP_LEVEL)
include(ExternalProject)
# if top-level, offer to build llvm
ExternalProject_Add (llvm
SVN_REPOSITORY http://llvm.org/svn/llvm-project/llvm/tags/RELEASE_800/final/
SOURCE_DIR ${CMAKE_SOURCE_DIR}/ext/llvm
INSTALL_DIR ${CMAKE_BINARY_DIR}/run
CMAKE_ARGS
-DCMAKE_C_COMPILER=${CMAKE_C_COMPILER} -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
-DCMAKE_INSTALL_PREFIX:PATH=<INSTALL_DIR>
-DLLVM_TARGETS_TO_BUILD:STRING=X86 -DWITH_POLY:BOOL=OFF
-DLLVM_ENABLE_PEDANTIC=OFF
-DLLVM_ENABLE_LLD=${SEA_ENABLE_LLD}
-DLLVM_ENABLE_PIC=ON
-DLLVM_BUILD_LLVM_DYLIB:BOOL=${BUILD_SEA_DSA_LIBS_SHARED}
-DLLVM_INCLUDE_TESTS:BOOL=OFF
-DLLVM_INCLUDE_GO_TESTS=OFF
-DLLVM_INCLUDE_EXAMPLES=OFF
-DLLVM_INCLUDE_DOCS=OFF
-DLLVM_BINDINGS_LIST=" "
LOG_CONFIGURE 1
LOG_BUILD 1
LOG_INSTALL 1)

# There seems to be a bug in LLVM 11's Debian packages.
# See: https://github.com/banach-space/llvm-tutor/commit/72cb20d058b9b3f51717c7a17607f7a4c7398642.
# So 11.1 is used here instead of 11.
find_package (LLVM 14.0 CONFIG)
if (NOT LLVM_FOUND)
ExternalProject_Get_Property (llvm INSTALL_DIR)
set (LLVM_ROOT ${INSTALL_DIR})
set (LLVM_DIR ${LLVM_ROOT}/lib/cmake/llvm CACHE PATH
"Forced location of LLVM cmake config" FORCE)
message (WARNING "No llvm found. Run \n\tcmake --build . && cmake ${CMAKE_SOURCE_DIR}")
message (WARNING "No llvm found. Install LLVM 14.")
return()
else()
set_target_properties(llvm PROPERTIES EXCLUDE_FROM_ALL ON)
endif()

message(STATUS "Found LLVM ${LLVM_PACKAGE_VERSION}")
Expand Down
37 changes: 20 additions & 17 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# SeaDsa: A Points-to Analysis for Verification of Low-level C/C++ #

<a href="https://travis-ci.org/seahorn/sea-dsa">
<img src="https://travis-ci.org/seahorn/sea-dsa.svg?branch=master" title="Ubuntu 14.04 LTS 64bit, g++-5"/>
<img src="https://travis-ci.org/seahorn/sea-dsa.svg?branch=dev14" title="Ubuntu 22.04 LTS 64bit, clang-14"/>
</a>


Expand All @@ -18,14 +18,16 @@ used as a stand-alone tool or together with
the [SeaHorn](https://github.com/seahorn/seahorn)
verification framework and its analyses.

This branch supports LLVM 14.

## Requirements ##

`SeaDsa` is written in C++ and uses the Boost library. The main requirements
are:

- C++ compiler supporting c++14
- Boost >= 1.65
- LLVM 10
- LLVM 14

To run tests, install the following packages:

Expand Down Expand Up @@ -69,10 +71,10 @@ project's `CMakeLists.txt`:

### Standalone (for developers) ###

If you already installed `llvm-10` on your machine:
If you already installed `llvm-14` on your machine:

mkdir build && cd build
cmake -DCMAKE_INSTALL_PREFIX=run -DLLVM_DIR=__here_llvm-10__/share/llvm/cmake ..
cmake -DCMAKE_INSTALL_PREFIX=run -DLLVM_DIR=__here_llvm-14__/share/llvm/cmake ..
cmake --build . --target install

Otherwise:
Expand Down Expand Up @@ -143,24 +145,25 @@ directory `DIR` then add the option `-sea-dsa-dot-outdir=DIR`

![Example of a memory graph](https://github.com/seahorn/sea-dsa/blob/tea-dsa/tests/expected_graphs/simple.jpg?raw=true)

In our memory model, a pointer is represented by a __cell__ which is a
pair of a memory object and offset. Memory objects are represented as
nodes in the memory graph. Edges are between cells.
In our memory model, a pointer value is represented by a __cell__
which is a pair of a memory object and offset. Memory objects are
represented as nodes in the memory graph. Edges are between cells.

Each node field represents a cell (i.e., an offset in the node). For
instance, the node fields `<0,i32**>` and `<8,i32**>` pointed by `%6`
and `%15`, respectively are two different cells from the same memory
object. The field `<8,i32**>` represents the cell at offset 8 in the
corresponding memory object and its type is `i32**`. Since edges are
between cells, they are labeled with a number that represents the
offset in the destination node. Blue edges connect formal parameters
of the function with a cell. Purple edges connect LLVM pointer
variables with cells. Nodes can have markers such as `S` (stack
allocated memory), `H` (heap allocate memory), `M` (modified memory),
`R` (read memory), `E` (externally allocated memory), etc. If a node
is red then it means that the analysis lost field sensitivity for that
node. The label `{void}` is used to denote that the node has been
allocated but it has not been used by the program.
corresponding memory object and its type is `i32**`. Black edges
represent points-to relationships between cells. They are labeled with
a number that represents the offset in the destination node. Blue
edges connect formal parameters of the function with a cell. Purple
edges connect LLVM pointer variables with cells. Nodes can have
markers such as `S` (stack allocated memory), `H` (heap allocate
memory), `M` (modified memory), `R` (read memory), `E` (externally
allocated memory), etc. If a node is red then it means that the
analysis lost field sensitivity for that node. The label `{void}` is
used to denote that the node has been allocated but it has not been
used by the program.

`sea-dsa` can also resolve indirect calls. An _indirect call_ is a
call where the callee is not known statically. `sea-dsa` identifies
Expand Down
9 changes: 5 additions & 4 deletions docker/README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
# Docker build for SeaDsa
# Docker build for SeaDsa #

Based on SeaHorn docker. For details, see `docer/README.md` in the corresponding [SeaHorn](https://github.com/seahorn/seahorn) repo.
Based on SeaHorn docker. For details, see `docker/README.md` in the corresponding [SeaHorn](https://github.com/seahorn/seahorn) repo.

## Building instructions ##

## Building instructions
Use the following command

```
docker build --build-arg BUILD_TYPE=RelWithDebInfo -t seahorn/sea-dsa-builder:bionic-llvm10 -f docker/sea-dsa-builder.Dockerfile .
docker build --build-arg BUILD_TYPE=RelWithDebInfo -t seahorn/sea-dsa-builder:jammy-llvm14 -f docker/sea-dsa-builder.Dockerfile .
```

7 changes: 3 additions & 4 deletions docker/sea-dsa-builder.Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,7 @@
# Primarily used by the CI
# Arguments:
# - BUILD_TYPE: Debug, RelWithDebInfo, Coverage
FROM seahorn/buildpack-deps-seahorn:bionic-llvm10

FROM seahorn/buildpack-deps-seahorn:jammy-llvm14

# Assume that docker-build is ran in the top-level SeaHorn directory
COPY . /sea-dsa
Expand All @@ -19,8 +18,8 @@ ARG BUILD_TYPE=RelWithDebInfo
RUN cmake .. -GNinja \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCMAKE_INSTALL_PREFIX=run \
-DCMAKE_CXX_COMPILER=clang++-10 \
-DCMAKE_C_COMPILER=clang-10 \
-DCMAKE_CXX_COMPILER=clang++-14 \
-DCMAKE_C_COMPILER=clang-14 \
-DSEA_ENABLE_LLD=ON \
-DCPACK_GENERATOR="TGZ" \
-DCMAKE_EXPORT_COMPILE_COMMANDS=ON
Expand Down
141 changes: 56 additions & 85 deletions lib/seadsa/DsaCompleteCallGraph.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"

#include "seadsa/AllocWrapInfo.hh"
#include "seadsa/CallGraphUtils.hh"
Expand Down Expand Up @@ -64,60 +65,25 @@ static const Value *findUniqueReturnValue(const Function &F) {
return onlyRetVal;
}

static Value *stripBitCast(Value *V) {
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) {
if (CE->isCast()) {
return CE->getOperand(0);
}
}
if (BitCastInst *BC = dyn_cast<BitCastInst>(V)) {
return BC->getOperand(0);
}
return V;
}

static bool isTypeCompatible(const Type *t1, const Type *t2) {
return (t1->isPointerTy() && t2->isPointerTy()) || (t1 == t2);
}

static bool isCalleeTypeCompatible(const CallBase &CB,
const Function &calleeF) {
unsigned csNArgs = CB.arg_size();
unsigned calleeNArgs = calleeF.arg_size();
FunctionType *calleeFTy = calleeF.getFunctionType();

if (calleeFTy->isVarArg()) {
// assert(csNArgs >= calleeNArgs)
if (csNArgs < calleeNArgs) { return false; }
} else {
// assert(csNArgs == calleeNArgs)
if (csNArgs != calleeNArgs) { return false; }
}

if (!isTypeCompatible(CB.getType(), calleeFTy->getReturnType())) {
return false;
}

// assert(csNArgs >= calleeNArgs)
for (unsigned i = 0; i < calleeNArgs; ++i) {
if (!isTypeCompatible(CB.getArgOperand(i)->getType(),
calleeFTy->getParamType(i))) {
return false;
}
}

return true;
}

static void resolveIndirectCallsThroughBitCast(Function &F, CallGraph &seaCg) {
// Resolve trivial indirect calls through bitcasts:
// call void (...) bitcast (void ()* @parse_dir_colors to void (...)*)()
// call i32 bitcast (i32 (...)* @nd_uint to i32 ()*)()
// call i32 (i32, i32)* bitcast (i32 (i32, i32)* (...)* @nd_binfptr to i32 (i32, i32)* ()*)()
//
// This is important because our top-down/bottom-up analyses
// traverse the call graph in a particular order (topological or
// reverse topological). If these edges are missing then the
// propagation can be done "too early" without analyzing the caller
// or callee yet.
auto stripBitCast = [](Value *V) {
if (BitCastInst *BC = dyn_cast<BitCastInst>(V)) {
return BC->getOperand(0);
} else {
return V->stripPointerCasts();
}
};

for (auto &I : llvm::make_range(inst_begin(&F), inst_end(&F))) {
if (!(isa<CallInst>(I) || isa<InvokeInst>(I))) continue;
CallBase &CB = *dyn_cast<CallBase>(&I);
Expand All @@ -128,10 +94,10 @@ static void resolveIndirectCallsThroughBitCast(Function &F, CallGraph &seaCg) {
CallGraphNode *calleeCGN = seaCg[calleeF];
callerCGN->removeCallEdgeFor(CB);
callerCGN->addCalledFunction(&CB, calleeCGN);
LOG("dsa-callgraph-trivial", llvm::errs()
<< "Added edge from " << F.getName()
<< " to " << calleeF->getName()
<< " with callsite=" << CB << "\n";);
LOG("dsa-callgraph", llvm::errs()
<< "Added edge from " << F.getName()
<< " to " << calleeF->getName()
<< " with callsite=" << CB << "\n";);
}
}
}
Expand Down Expand Up @@ -244,7 +210,7 @@ void CompleteCallGraphAnalysis::cloneAndResolveArgumentsAndCallSites(
Cell &nc = callerG.mkCell(*CS.getInstruction(), Cell());

// Clone the return value directly, if we know that it corresponds to a
// single allocation site from a global
// single allocation site (e.g., return value of a malloc, a global, etc.)
const Value *onlyAllocSite = findUniqueReturnValue(callee);
if (NoBUFlowSensitiveOpt ||
(onlyAllocSite && !isa<GlobalValue>(onlyAllocSite))) {
Expand Down Expand Up @@ -567,9 +533,9 @@ bool CompleteCallGraphAnalysis::runOnModule(Module &M) {
const Function *caller = cs.getCaller();
CallGraphNode *CGNCaller = (*m_complete_cg)[caller];
assert(CGNCaller);
const CallBase &CGNCB = *dyn_cast<CallBase>(cs.getInstruction());
CallBase *cb = const_cast<CallBase *>(&CGNCB);

// We remove "const" because of isLegalToPromote
CallBase *cb = const_cast<CallBase *>(dyn_cast<CallBase>(cs.getInstruction()));
// At this point, we can try to resolve the indirect call
// Update the callgraph by adding a new edge to each
// resolved callee. However, the call site is not marked as
Expand All @@ -579,19 +545,19 @@ bool CompleteCallGraphAnalysis::runOnModule(Module &M) {
if (const Function *fn =
dyn_cast<Function>(v->stripPointerCastsAndAliases())) {

// If the callee is not type-compatible with the
// callsite then we ignore it. This ensures that the
// bottom-up phase only inlines a callee's graph if the
// callee's signature is type-compatible with the
// callsite.
if (!isCalleeTypeCompatible(*cb, *fn)) {
continue;
// Check that the callbase and the callee are type-compatible
if (!isLegalToPromote(*cb, const_cast<Function*>(fn))) {
continue;
}

foundAtLeastOneCallee = true;
CallGraphNode *CGNCallee = (*m_complete_cg)[fn];
assert(CGNCallee);
if (!hasEdge(CGNCaller, CGNCallee, CGNCB)) {
if (!hasEdge(CGNCaller, CGNCallee, *cb)) {
assert(cb);
LOG("dsa-callgraph",
llvm::errs() << "Added edge from " << caller->getName() << " to "
<< fn->getName() << " with callsite=" << *cb << "\n";);
CGNCaller->addCalledFunction(cb, CGNCallee);
m_callees[cs.getInstruction()].push_back(fn);
change = true;
Expand All @@ -608,14 +574,6 @@ bool CompleteCallGraphAnalysis::runOnModule(Module &M) {
// callee to resolve it)
// 3) it has no external allocation site.
m_resolved.insert(cs.getInstruction());
// LOG("dsa-callgraph-resolve",
// errs() << "Resolving indirect call by "
// << kv.first->getName() << " at "
// <<
// cs.getInstruction()->getParent()->getParent()->getName()
// << ":\n";
// cs.write(errs()); errs() << "\n";
// errs() << "Marked as complete.\n";);
}
}

Expand All @@ -642,23 +600,36 @@ bool CompleteCallGraphAnalysis::runOnModule(Module &M) {
for (auto &F : M) {
if (F.empty()) continue;

CallGraphNode *CGNF = (*m_complete_cg)[&F];
assert(CGNF);
if (!CGNF) continue;

// collect first callsites to avoid invalidating iterators
std::vector<CallBase*> toRemove;
for (auto &kv : llvm::make_range(CGNF->begin(), CGNF->end())) {
if (!kv.first) continue;
CallBase &CB = *dyn_cast<CallBase>(*kv.first);
if (CB.isIndirectCall() &&
!kv.second->getFunction() /* has no callee */) {
if (m_resolved.count(&CB) > 0) toRemove.push_back(&CB);
CallGraphNode *callerCG = (*m_complete_cg)[&F];
assert(callerCG);
if (!callerCG) continue;

// This loop removes safely edges while iterating
auto et = callerCG->end();
for (auto it = callerCG->begin();it!=et;) {
if (it->first) {
CallBase &CB = *dyn_cast<CallBase>(*(it->first));
if (CB.isIndirectCall() && !it->second->getFunction() /* has no callee */) {
if (m_resolved.count(&CB) > 0) {
LOG("dsa-callgraph",
llvm::errs() << "Removed edge from "
<< callerCG->getFunction()->getName() << " to "
<< it->second << " (external node) with callsite=" << CB << "\n";);

// we need to bail out if we are going to remove the last edge
bool wasLast = (it+1 == et);
// we don't increment it because removeCallEdge modifies *it
callerCG->removeCallEdge(it);
if (wasLast) {
break;
}
// we update et after deleting one of the edges
et = callerCG->end();
continue;
}
}
}
}
for (CallBase *CB : toRemove) {
assert(CB);
CGNF->removeCallEdgeFor(*CB);
++it;
}
}

Expand Down
Loading
Loading