ChrisCummins · shitongzhu · Aug 29, 2020 · Aug 29, 2020 · Jul 26, 2021 · Jul 27, 2021
diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,6 @@
 /env
 
 __pycache__/
+
+/dataset/dataflow/*/*
+/evaluation/log/*
diff --git a/.gitmodules b/.gitmodules
@@ -0,0 +1,3 @@
+[submodule "captum"]
+	path = captum
+	url = [email protected]:shitongzhu/captum.git
diff --git a/captum b/captum
diff --git a/evaluation/script/aggregate_exp_res.py b/evaluation/script/aggregate_exp_res.py
@@ -0,0 +1,205 @@
+import os
+import argparse
+import matplotlib
+matplotlib.use('Agg')  # to avoid using Xserver
+import matplotlib.pyplot as plt
+
+
+COLOR_LIST = ["blue", "red", "yellow", "orange"]
+
+
+def parse_log_file(fpath):
+    global log_info
+
+    with open(fpath, 'r') as fin:
+        data = fin.readlines()
+
+    header_str = '\t'.join(data[1].strip().split(' | ')[1].split('\t')[1:])
+    variants = header_str.split('\t')
+    for variant in variants:
+        if variant not in log_info:
+            if "DELETION" in variant or "RETENTION" in variant:
+                log_info[variant] = [[] for _ in range(10)]
+            else:
+                log_info[variant] = []
+
+    del data[:2]
+    for row in data:
+        row = row.strip()
+        result_str = '\t'.join(row.split(' | ')[1].split('\t')[1:])
+
+        scores = list(map(lambda x: float(x), result_str.split('\t')[:8]))
+        for i in range(8):
+            log_info[variants[i]].append(scores[i])
+
+        prob_delta = list(map(lambda x: eval(x), result_str.split('\t')[8:]))
+        for i in range(8, 16):
+            for j in range(len(prob_delta[i - 8])):
+                log_info[variants[i]][j].append(prob_delta[i - 8][j])
+
+
+parser = argparse.ArgumentParser(description='Aggregate attr accu logs.')
+parser.add_argument(
+    "--task", type=str, help="specify the task that log aggregation should be applied to.")
+parser.add_argument("--dir", type=str, help="specify log directory.")
+args = parser.parse_args()
+
+log_info = {}
+
+log_filenames = os.listdir(args.dir)
+for fname in log_filenames:
+    if args.task in fname:
+        parse_log_file(args.dir + '/' + fname)
+
+print("Analyzing task name: %s..." % args.task)
+print("In directory: %s" % args.dir)
+
+print("====== Mean Attribution Score ======")
+color_choice_deletion = 0
+color_choice_retention = 0
+for variant, scores in log_info.items():
+    if "DELETION" in variant:
+        if color_choice_deletion == 0:
+            save_img_path = args.dir + "/viz/DELETION_comparison.png"
+        x_list, y_list = [], []
+        for i in range(len(scores)):
+            print("[%s] Step #%d (mean) --> %f" %
+                  (variant, i, sum(scores[i]) / len(scores[i])))
+            x_list.append(i)
+            y_list.append(sum(scores[i]) / len(scores[i]))
+        plt.plot(x_list, y_list, color=COLOR_LIST[color_choice_deletion], label=variant.replace("DELETION_RES_", ''))
+        plt.legend()
+        color_choice_deletion += 1
+        if color_choice_deletion == 4:
+            plt.xlabel("number of steps")
+            plt.ylabel("predicted class probability")
+            plt.title("Deletion Game Results")
+            plt.show()
+            plt.savefig(save_img_path, format="PNG")
+            plt.clf()
+    elif "RETENTION" in variant:
+        if color_choice_retention == 0:
+            save_img_path = args.dir + "/viz/RETENTION_comparison.png"
+        x_list, y_list = [], []
+        for i in range(len(scores)):
+            print("[%s] Step #%d (mean) --> %f" %
+                  (variant, i, sum(scores[i]) / len(scores[i])))
+            x_list.append(i)
+            y_list.append(sum(scores[i]) / len(scores[i]))
+        plt.plot(x_list, y_list, color=COLOR_LIST[color_choice_retention], label=variant.replace("RETENTION_RES_", ''))
+        plt.legend()
+        color_choice_retention += 1
+        if color_choice_retention == 4:
+            plt.xlabel("number of steps")
+            plt.ylabel("predicted class probability")
+            plt.title("Retention Game Results")
+            plt.show()
+            plt.savefig(save_img_path, format="PNG")
+            plt.clf()
+    else:
+        mean_score = sum(scores) / len(scores)
+        if variant in {"ASCENDING_DEPENDENCY_GUIDED_IG", "UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG", "DESCENDING_DEPENDENCY_GUIDED_IG"}:
+            std_mean_score = sum(log_info["STANDARD_IG"]) / \
+                len(log_info["STANDARD_IG"])
+            if std_mean_score == 0.0:
+                continue
+            margin = (mean_score - std_mean_score) / std_mean_score
+            print("[ATTR_ACC_SCORE] Variant: %s | # Samples: %d | Mean score: %f (%s)" %
+                (variant, len(scores), mean_score, "{:.2f}".format(margin * 100) + "%"))
+        elif variant == "STANDARD_IG":
+            print("[ATTR_ACC_SCORE] Variant: %s | # Samples: %d | Mean score: %f" %
+                  (variant, len(scores), mean_score))
+
+        if variant in {"FAITH_ASCENDING_DEPENDENCY_GUIDED_IG", "FAITH_UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG", "FAITH_DESCENDING_DEPENDENCY_GUIDED_IG"}:
+            std_mean_score = sum(log_info["FAITH_STANDARD_IG"]) / \
+                len(log_info["FAITH_STANDARD_IG"])
+            if std_mean_score == 0.0:
+                continue
+            margin = (mean_score - std_mean_score) / std_mean_score
+            print("[FAITH_SCORE] Variant: %s | # Samples: %d | Mean score: %f (%s)" %
+                (variant, len(scores), mean_score, "{:.2f}".format(margin * 100) + "%"))
+        elif variant == "FAITH_STANDARD_IG":
+            print("[FAITH_SCORE] Variant: %s | # Samples: %d | Mean score: %f" %
+                  (variant, len(scores), mean_score))
+
+running_ranks = {}
+for variant, _ in log_info.items():
+    running_ranks[variant] = []
+
+for i in range(len(log_info["STANDARD_IG"])):
+    attr_acc_std_ig = log_info["STANDARD_IG"][i]
+    attr_acc_dep_guided_ig = log_info["ASCENDING_DEPENDENCY_GUIDED_IG"][i]
+    attr_acc_dep_guided_ig_unaccumulated = log_info["UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG"][i]
+    attr_acc_reverse_dep_guided_ig = log_info["DESCENDING_DEPENDENCY_GUIDED_IG"][i]
+    faith_score_std_ig = log_info["FAITH_STANDARD_IG"][i]
+    faith_score_dep_guided_ig = log_info["FAITH_ASCENDING_DEPENDENCY_GUIDED_IG"][i]
+    faith_score_dep_guided_ig_unaccumulated = log_info[
+        "FAITH_UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG"][i]
+    faith_score_reverse_dep_guided_ig = log_info["FAITH_DESCENDING_DEPENDENCY_GUIDED_IG"][i]
+
+    sorted_acc_scores = sorted([
+        attr_acc_std_ig,
+        attr_acc_dep_guided_ig,
+        attr_acc_dep_guided_ig_unaccumulated,
+        attr_acc_reverse_dep_guided_ig
+    ])
+    variant_rank = list(map(lambda x: sorted_acc_scores.index(x), [
+        attr_acc_std_ig,
+        attr_acc_dep_guided_ig,
+        attr_acc_dep_guided_ig_unaccumulated,
+        attr_acc_reverse_dep_guided_ig,
+    ]))
+
+    sorted_faith_scores = sorted([faith_score_std_ig,
+                                  faith_score_dep_guided_ig,
+                                  faith_score_dep_guided_ig_unaccumulated,
+                                  faith_score_reverse_dep_guided_ig
+                                  ])
+    variant_rank_faith = list(map(lambda x: sorted_faith_scores.index(x), [
+        faith_score_std_ig,
+        faith_score_dep_guided_ig,
+        faith_score_dep_guided_ig_unaccumulated,
+        faith_score_reverse_dep_guided_ig,
+    ]))
+
+    running_ranks["STANDARD_IG"].append(variant_rank[0])
+    running_ranks["ASCENDING_DEPENDENCY_GUIDED_IG"].append(variant_rank[1])
+    running_ranks["UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG"].append(
+        variant_rank[2])
+    running_ranks["DESCENDING_DEPENDENCY_GUIDED_IG"].append(variant_rank[3])
+    running_ranks["FAITH_STANDARD_IG"].append(variant_rank_faith[0])
+    running_ranks["FAITH_ASCENDING_DEPENDENCY_GUIDED_IG"].append(
+        variant_rank_faith[1])
+    running_ranks["FAITH_UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG"].append(
+        variant_rank_faith[2])
+    running_ranks["FAITH_DESCENDING_DEPENDENCY_GUIDED_IG"].append(
+        variant_rank_faith[3])
+
+print("====== Ranking For Variants ======")
+for variant, ranks in running_ranks.items():
+    if "DELETION" in variant or "RETENTION" in variant:
+        continue
+    mean_rank = sum(ranks) / len(ranks)
+    if variant in {"ASCENDING_DEPENDENCY_GUIDED_IG", "UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG", "DESCENDING_DEPENDENCY_GUIDED_IG"}:
+        std_mean_rank = sum(running_ranks["STANDARD_IG"]) / \
+            len(running_ranks["STANDARD_IG"])
+        if std_mean_rank == 0.0:
+            continue
+        margin = (mean_rank - std_mean_rank) / std_mean_rank
+        print("[ATTR_ACC_SCORE] Variant: %s | # Samples: %d | Mean rank: %f (%s)" %
+              (variant, len(ranks), mean_rank, "{:.2f}".format(margin * 100) + "%"))
+    elif variant == "STANDARD_IG":
+        print("[ATTR_ACC_SCORE] Variant: %s | # Samples: %d | Mean rank: %f" %
+              (variant, len(ranks), mean_rank))
+
+    if variant in {"FAITH_ASCENDING_DEPENDENCY_GUIDED_IG", "FAITH_UNACCUMULATED_ASCENDING_DEPENDENCY_GUIDED_IG", "FAITH_DESCENDING_DEPENDENCY_GUIDED_IG"}:
+        std_mean_rank = sum(running_ranks["STANDARD_IG"]) / \
+            len(running_ranks["STANDARD_IG"])
+        if std_mean_rank == 0.0:
+            continue
+        margin = (mean_rank - std_mean_rank) / std_mean_rank
+        print("[FAITH_SCORE] Variant: %s | # Samples: %d | Mean rank: %f (%s)" %
+              (variant, len(ranks), mean_rank, "{:.2f}".format(margin * 100) + "%"))
+    elif variant == "FAITH_STANDARD_IG":
+        print("[FAITH_SCORE] Variant: %s | # Samples: %d | Mean rank: %f" %
+              (variant, len(ranks), mean_rank))
diff --git a/evaluation/script/batch_ggnn_test.sh b/evaluation/script/batch_ggnn_test.sh
@@ -0,0 +1,29 @@
+#!/bin/bash
+
+echo "Total number of instances to run: $1"
+echo "Max number of graph nodes: $2"
+echo "Max toleratable ratio of edge removal for removing cycles: $3"
+echo "Task name: $4"
+echo "Total test size: $5"
+echo "Additional arg #1: $6"
+
+per_instance_size=$(($5/$1))
+
+echo "Per-instance test size: $per_instance_size"
+
+if [ $4 = "domtree" ]; then
+    model_id="14"
+else
+    model_id="15"
+fi
+
+for instance_id in `seq 1 $1`
+do
+    echo "Starting instance with ID $instance_id..."
+    nohup bazel run --verbose_failures //programl/task/dataflow:ggnn_test \
+        -- --model=/logs/programl/$4/ddf_30/checkpoints/0$model_id.Checkpoint.pb \
+        --ig -dep_guided_ig --save_vis --only_pred_y --batch --random_test_size $per_instance_size \
+        --max_vis_graph_complexity $2 --max_removed_edges_ratio $3 --task $4 \
+        --filter_adjacant_nodes --instance_id $instance_id --num_instances $1 $6\
+        > ../log/nohup_$4_exp_$2_$3_$1_$instance_id.log 2>&1 &
+done
diff --git a/evaluation/script/clean_up_after_exp.sh b/evaluation/script/clean_up_after_exp.sh
@@ -0,0 +1,7 @@
+#!/bin/bash
+
+echo "Cleaning up dirs..."
+
+rm ../../dataset/dataflow/exp_log/*
+rm ../../dataset/dataflow/vis_res/*
+rm ../log/*
diff --git a/evaluation/script/single_ggnn_test.sh b/evaluation/script/single_ggnn_test.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+echo "Max number of graph nodes: $1"
+echo "Max toleratable ratio of edge removal for removing cycles: $2"
+echo "Task name: $3"
+echo "Additional arg #1: $4"
+
+if [ $3 = "domtree" ]; then
+    model_id="14"
+else
+    model_id="15"
+fi
+
+bazel run --verbose_failures //programl/task/dataflow:ggnn_test \
+    -- --model=/logs/programl/$3/ddf_30/checkpoints/0$model_id.Checkpoint.pb \
+    --ig -dep_guided_ig --save_vis --only_pred_y --batch --random_test_size 100 \
+    --max_vis_graph_complexity $1 --max_removed_edges_ratio $2 --task $3 \
+    --filter_adjacant_nodes --instance_id 1 --num_instances 1 --debug $4
diff --git a/install b/install
@@ -0,0 +1,79 @@
+#!/usr/bin/env bash
+help() {
+  cat <<EOF
+Install the command line ProGraML tools.
+
+Usage:
+
+  $ bazel run -c opt //:install [prefix]
+
+Installs the command line tools to [prefix]/bin. [prefix] defaults to ~/.local/opt/programl.
+EOF
+}
+
+# --- begin labm8 init ---
+f=programl/external/labm8/labm8/sh/app.sh
+# shellcheck disable=SC1090
+source "${RUNFILES_DIR:-/dev/null}/$f" 2>/dev/null ||
+  source "$(grep -sm1 "^$f " "${RUNFILES_MANIFEST_FILE:-/dev/null}" | cut -f2- -d' ')" 2>/dev/null ||
+  source "$0.runfiles/$f" 2>/dev/null ||
+  source "$(grep -sm1 "^$f " "$0.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null ||
+  source "$(grep -sm1 "^$f " "$0.exe.runfiles_manifest" | cut -f2- -d' ')" 2>/dev/null ||
+  {
+    echo >&2 "ERROR: cannot find $f"
+    exit 1
+  }
+f=
+# --- end app init ---
+
+set -euo pipefail
+
+BINARIES=(
+  "$(DataPath programl/programl/cmd/analyze)"
+  "$(DataPath programl/programl/cmd/clang2graph)"
+  "$(DataPath programl/programl/cmd/graph2cdfg)"
+  "$(DataPath programl/programl/cmd/graph2dot)"
+  "$(DataPath programl/programl/cmd/graph2json)"
+  "$(DataPath programl/programl/cmd/llvm2graph)"
+  "$(DataPath programl/programl/cmd/pbq)"
+  "$(DataPath programl/programl/cmd/xla2graph)"
+)
+
+if [[ $(uname) == Darwin ]]; then
+  LLVM_LIBS="$(DataPath clang-llvm-10.0.0-x86_64-apple-darwin/lib)"
+else
+  LLVM_LIBS="$(DataPath clang-llvm-10.0.0-x86_64-linux-gnu-ubuntu-18.04/lib)"
+fi
+
+main() {
+  set +u
+  if [[ "$1" == "--help" ]]; then
+    help
+    exit 1
+  fi
+  set -u
+
+  local prefix=${1:-~/.local/opt/programl}
+  mkdir -p "$prefix/bin" "$prefix/lib"
+
+  echo "Installing ProGraML command line tools ..."
+  echo
+  for bin in "${BINARIES[@]}"; do
+    dst="$prefix/bin/$(basename $bin)"
+    echo "    $dst"
+    rm -f "$dst"
+    cp $bin "$dst"
+  done
+
+  echo
+  echo "Installing libraries to $prefix/libs ..."
+  rsync -ah --delete --exclude '*.a' "$LLVM_LIBS/" "$prefix/lib/"
+
+  echo
+  echo "===================================================="
+  echo "To use them, add the following to your ~/.$(basename $SHELL)rc:"
+  echo
+  echo "export PATH=$prefix/bin:\$PATH"
+  echo "export LD_LIBRARY_PATH=$prefix/lib:\$LD_LIBRARY_PATH"
+}
+main "$@"
diff --git a/programl/BUILD b/programl/BUILD
@@ -0,0 +1,27 @@
+# Copyright 2019-2020 the ProGraML authors.
+#
+# Contact Chris Cummins <[email protected]>.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+py_library(
+    name = "serialize_ops",
+    srcs = [
+        "exceptions.py",
+        "serialize_ops.py",
+    ],
+    visibility = ["//visibility:public"],
+    deps = [
+	"//programl/proto:program_graph_py",
+    ],
+)