apple75
diff --git a/‎LICENSE
Lines changed: 673 additions & 0 deletions b/‎LICENSE
Lines changed: 673 additions & 0 deletions
diff --git a/‎README.md
Lines changed: 121 additions & 0 deletions b/‎README.md
Lines changed: 121 additions & 0 deletions
diff --git a/‎helper_scripts/flip_channels.py
Lines changed: 24 additions & 0 deletions b/‎helper_scripts/flip_channels.py
Lines changed: 24 additions & 0 deletions
diff --git a/‎helper_scripts/flip_csv.py
Lines changed: 16 additions & 0 deletions b/‎helper_scripts/flip_csv.py
Lines changed: 16 additions & 0 deletions
diff --git a/‎helper_scripts/get_checkpoints_from_logs.py
Lines changed: 121 additions & 0 deletions b/‎helper_scripts/get_checkpoints_from_logs.py
Lines changed: 121 additions & 0 deletions
diff --git a/‎helper_scripts/get_matching_checkpoints.py
Lines changed: 37 additions & 0 deletions b/‎helper_scripts/get_matching_checkpoints.py
Lines changed: 37 additions & 0 deletions
diff --git a/‎helper_scripts/intersect_aug_json.py
Lines changed: 31 additions & 0 deletions b/‎helper_scripts/intersect_aug_json.py
Lines changed: 31 additions & 0 deletions
diff --git a/‎helper_scripts/merge_model_dirs.py
Lines changed: 37 additions & 0 deletions b/‎helper_scripts/merge_model_dirs.py
Lines changed: 37 additions & 0 deletions
@@ -0,0 +1,121 @@
+# drone_causality
+
+All training, data processing, and analysis code used for the paper "Robust Visual Flight Navigation with Liquid Neural Networks". For code run onboard the drone, see [this repository](https://github.com/GoldenZephyr/rosetta_drone).
+
+## Installation Instructions
+
+For x86 based systems (most computers), setup your python environment using conda environment file in configs/environment.yml
+
+~~~
+cd drone_causality
+conda env create -f config/environment.yml
+conda activate causality
+~~~
+
+Another environment file is available for ppc64le (PowerPC) based architectures
+~~~
+conda env create -f config/satori_environment.yml
+conda activate causality
+~~~
+
+Alternatively, a Docker image containing all required packages can be found on Docker Hub at dolphonie1/causal_repo:0.1.17
+
+~~~
+docker pull dolphonie1/causal_repo:0.1.17
+docker run -it --net=host dolphonie1/causal_repo:0.1.17 /bin/bash
+~~~
+## Downloading Datasets/Existing Checkpoints
+The original hand-collected training dataset can be found [here](http://knightridermit.myqnapcloud.com:8080/share.cgi?ssid=06lMJMN&fid=06lMJMN&path=%2F&filename=devens_snowy_fixed.zip&openfolder=forcedownload&ep=) (filename: devens_snowy_fixed, size:33.2GB). Additionally, we have a subset of the full `devens_snowy_fixed` dataset that only contains runs with the chair [here](http://knightridermit.myqnapcloud.com:8080/share.cgi?ssid=06lMJMN&fid=06lMJMN&path=%2F&filename=devens_chair.zip&openfolder=forcedownload&ep=) (devens_chair, 2.3GB).
+
+We have also included the exact synthetic datasets we used for our experiments. These datasets were created using the script at `preprocess/closed_loop_augmentation.py`, but with a random seed. We have both a [full dataset](http://knightridermit.myqnapcloud.com:8080/share.cgi?ssid=06lMJMN&fid=06lMJMN&path=%2F&filename=synthetic_small4.zip&openfolder=forcedownload&ep=) (synthetic_small4, 14.7GB) used to train the starting checkpoint here and a [chair-only dataset](http://knightridermit.myqnapcloud.com:8080/share.cgi?ssid=06lMJMN&fid=06lMJMN&path=%2F&filename=synthetic_chair4.zip&openfolder=forcedownload&ep=) (synthetic_chair, 4.3 GB) used to fine-tune the final models for testing at.
+
+To replicate the results of our experiments, first train on the entire dataset, `devens_snowy_fixed` with the full synthetic dataset `synthetic_small4` or use the checkpoints in chair4_long_balanced.
+
+Afterwards, fine-tune models starting from `checkpoints/chair4_long_balanced` on the `devens_chair` dataset with the synthetic dataset `synthetic_chair4`
+
+All training was done using the best hyperparameters found in the `old_db` folder.
+## Training Models
+### Training Once
+The script tf_data_training.py executes 1 training run. It loads data and models, sets up multi-GPU processing strategy, and runs training while checkpointing models. The script's default hyperparameters are static and are _not_ the best hyperparameters found during parameter tuning. Any hyperparameters need to be manually specified.
+
+Example usage: 
+~~~
+python3 tf_data_training.py --model ncp --data_dir /path/to/devens_snowy_fixed --extra_data_dir /path/to/synthetic_small4 --epochs 100 --seq_len 64 --data_stride 1 --data_shift 16
+~~~
+
+### Training Multiple Times
+The convenience script train_multiple.py automatically manages multiple training runs, saving log JSON files to record the results of each run and intelligently determining how many runs have been completed so far to allow for resuming training. The script also automatically loads hyperparameters from the best study when given a hyperparameter study database file.
+
+Example usage:
+~~~
+python train_multiple.py ncp_objective /path/to/devens_snowy_short --n_trains 5 --batch_size 300 --storage_name sqlite:///old_db/ncp_objective".db --storage_type rdb --timeout 72000 --extra_data_dir /path/to/synthetic --hotstart_dir /path/to/chair4_long_balanced --study_name hyperparam_tuning_ --out_dir chair4_fine_targets
+~~~
+
+The `storage_name` argument specifies the database file (in the `old_db` folder) that the best hyperparameters should be read from. Unfortunately, because training was conducted on different machines, different objectives have different hyperparameter files. For each type of network, use the following `storage_name`:
+
+- LSTM: sqlite:///old_db/lstm_objective.db
+- CFC: sqlite:///old_db/cfc_objective.db
+- NCP: sqlite:///old_db/ncp_objective.db
+- GRUODE: sqlite:///old_db/hyperparam_tuning.db
+- TCN: old_db/tcn_objective.json
+- Wiredcfccell (Sparse-CfC): sqlite:///old_db/wiredcfccell_objective.db
+- LTC: sqlite:///old_db/hyperparam_tuning.db
+- CT-RNN: old_db/ctrnn_objective.json
+
+Note that the `storage_type` argument should be set to `rdb` for sqlite URLs, json for JSON files, and `pkl` for PKL files
+
+## Preprocessing Data
+This section describes the methodology used to generate the dataset `devens_snowy_fixed`.
+
+If using new data collected on the drone, use script `preprocess/process_data.py` to format it correctly for training scripts. Runs should have the red channel as the 0th channel (appear not flipped when opened by an image viewer).
+
+The runs tht don't have an underscore in them (ex 1628106140.64) are the original long runs that see all 5 targets. The runs with underscores (ex 1628106140.64_1) are generated using the script `preprocess/sequence_slice/slice_sequence.py`, which provides a GUI for specifying start and end points and automaticallly copies images and control csv.
+
+To generate new synthetic datasets, use the script `preprocess/closed_loop_augmentation.py`. The directory `preprocess/aug_json` contains JSON files that contain images to be augmented and the pixel location of the target within the image (generated by `preprocess/select_targets.py`).
+
+Example Usage:
+The dataset `synthetic_small4` was generated with the following invocation:
+~~~
+python closed_loop_augmentation.py aug_json/synthetic_full_small.json /path/to/out/dir/synthetic_small4 --num_aug 5 --balance_classes --balance_offsets -10 -70 0 0
+~~~
+
+## Tuning Hyperparams
+The Optuna hyperparameter study db files in the `old_db` directory were generated using the file `hyperparam_tuning.py`. This script is responsible for sampling parameters using Bayesian Optimization, running training multiple times using the objective functions in `utils/objective_functions.py`, and logging the results within the Optuna study object.
+
+Example usage:
+~~~
+python hyperparameter_tuning.py ncp_objective /path/to/dataset --n_trials 40 --timeout 64800 --batch_size 300 --extra_data_dir /path/to/synthetic_dataset
+~~~
+
+## Analyzing Results
+
+### Stress Tests
+The stress test figures used in the paper were generated with the script `analysis/perturb_trajectory.py`
+
+Example usage:
+~~~
+python analysis/perturb_trajectory.py dataset_jsons/chair_short_raw.json checkpoints/chair4_fine/train/params.json contrast_perturbation --distance_fxn final_distance --deltas 0.5 1.5 2 2.5 --skip_models ctrnn_mixedcfc --perturb_frac 0.2 --force_even_x
+~~~
+
+This file, (and most other analysis files), consume a dataset_json file in the format
+~~~
+{
+    "name_of_dataset" : [
+        "/path/to/dataset",
+        [boolean of whether to flip color channels],
+        "path/to/control_csv" or null if no csv desired,
+    ], ...
+}
+~~~
+
+You will most likely have to edit the files in `dataset_jsons` to match the runs you want to analyze on your computer.
+### Useful files
+
+- visualization_runner.py: Used for generating videos of visual backprop, input grad, shap, or other visualization technique overlaid on original video sequence and visualization of controls
+- analysis/vis_grid.py: Used for generating multiple images of visual backprop and original camera images. Used in paper
+- analysis/lipschitz_constant.py: Calculates lipschitz constant of RNN hidden state components when seeing a given sequence of inputs. (Measures maximum difference in rnn hidden state in 2 consecutive timestamps)
+- analysis/loss_graph.py: Plots training loss curves
+- analysis/ssim.py: Calculates structural similarity index of saliency maps when random noise is added to image
+
+
+Contact: patrick[dot]d[dot]kao[at]gmail[dot]com for any questions
@@ -0,0 +1,24 @@
+# Created by Patrick Kao at 5/3/22
+import argparse
+import os
+from pathlib import Path
+
+import cv2
+import numpy as np
+
+from keras_models import IMAGE_SHAPE
+from utils.data_utils import load_image
+
+
+def flip_channels(im_dir: str, out_dir: str):
+    Path(out_dir).mkdir(parents=True, exist_ok=True)
+    for im_path in os.listdir(im_dir):
+        img = load_image(os.path.join(im_dir, im_path), IMAGE_SHAPE, reverse_channels=False) # writing flips channels
+        cv2.imwrite(os.path.join(out_dir, im_path), np.squeeze(img, axis=0), )
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("im_dir")
+    parser.add_argument("out_dir")
+    args = parser.parse_args()
+    flip_channels(args.im_dir, args.out_dir)
@@ -0,0 +1,16 @@
+import os
+from typing import Sequence
+
+import pandas as pd
+
+
+def flip_csv(csv_file: str, columns: Sequence[str]):
+    df = pd.read_csv(csv_file)
+    for col in columns:
+        df[col] = df[col].apply(lambda x: x*-1)
+
+    df.to_csv(csv_file, index=False)
+
+data = "/home/dolphonie/Desktop/mixed_aug_fixed"
+for folder in os.listdir(data):
+    flip_csv(os.path.join(data, folder, "data_out.csv"), ["vz", "omega_z"])
@@ -0,0 +1,121 @@
+import argparse
+import json
+import os.path
+import re
+import shutil
+from collections import defaultdict
+from json import JSONDecodeError
+from pathlib import Path
+from typing import List, Dict, Any
+
+from utils.model_utils import get_readable_name
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_checkpoint_props(checkpoint_path: str) -> Dict[str, Any]:
+    """
+    Given name of checkpoint path, extracts relevant properties from string
+
+    :param checkpoint_path: Path or basename of model checkpoint to be analyzed
+    :return: Dict of checkpoint properties, val loss, train loss, and epoch
+    """
+    props = {}
+
+    val_index = checkpoint_path.index("val")
+    val_loss = float(checkpoint_path[val_index + 9:val_index + 15])
+    props["val_loss"] = val_loss
+
+    try:
+        train_index = checkpoint_path.index("train")
+        train_loss = float(checkpoint_path[train_index + 11:train_index + 17])
+        props["train_loss"] = train_loss
+    except ValueError:
+        props["train_loss"] = 999
+
+    epoch_index = checkpoint_path.index("epoch")
+    epoch = int(checkpoint_path[epoch_index + 6:epoch_index + 9])
+    props["epoch"] = epoch
+
+    # get checkpoint time string
+    time_search = re.compile(".*(\d\d\d\d:\d\d:\d\d:\d\d:\d\d:\d\d).hdf5")
+    time_str = time_search.search(checkpoint_path).group(1)
+    props["checkpoint_time_str"] = time_str
+
+    # get model name
+    name_search = re.compile("model-(.*)_seq-.*")
+    model_name = name_search.search(checkpoint_path).group(1)
+    props["model_name"] = model_name
+
+    return props
+
+
+def get_best_checkpoint(candidate_jsons: List[Dict[str, Any]], checkpoint_dir: str, criteria_key: str = "val"):
+    assert criteria_key == "val" or criteria_key == "train", "only val and train supported"
+    best_props = None
+    best_cand_value = float("inf")
+    for candidate in candidate_jsons:
+        cand_value = candidate[f"best_{criteria_key}_loss"]
+        if cand_value < best_cand_value:
+            best_props = {
+                f"{criteria_key}_loss": round(cand_value, 4),
+                "epoch": candidate[f"best_{criteria_key}_epoch"] + 1,  # checkpoints epoch 1 indexed, jsons 0-indexed
+                "model_name": get_readable_name(candidate["model_params"])
+            }
+            if "checkpoint_time_str" in candidate:
+                best_props["checkpoint_time_str"] = candidate["checkpoint_time_str"]
+
+    for checkpoint in os.listdir(checkpoint_dir):
+        if ".hdf5" not in checkpoint:
+            continue
+        props = get_checkpoint_props(checkpoint)
+        if best_props.items() <= props.items():
+            return os.path.join(checkpoint_dir, checkpoint)
+
+    raise ValueError(f"No checkpoint matching props in json {best_props} found")
+
+
+def read_json(path):
+    with open(path, "r") as f:
+        return json.load(f)
+
+
+def process_json_list(json_dir: str, checkpoint_dir: str, out_dir: str):
+    json_map = defaultdict(list)
+    # separate jsons by class
+    re_match = re.compile("(?:hyperparam_tuning_)?(.*)_\d_train_results.json")
+    for file in os.listdir(json_dir):
+        match = re_match.search(file)
+        if match is not None:
+            model_type = match.group(1)
+            # read json data and save
+            json_path = os.path.join(json_dir, file)
+            try:
+                parsed = read_json(json_path)
+                json_map[model_type].append(parsed)
+            except JSONDecodeError:
+                print(f"Could not parse json at {json_path}, skipping")
+                continue
+
+    for candidate in ["val", "train"]:
+        params_map = {}
+        # for each class, get best checkpoint
+        dest = os.path.join(out_dir, candidate)
+        Path(dest).mkdir(exist_ok=True, parents=True)
+        for model_type, json_data in json_map.items():
+            checkpoint_path = get_best_checkpoint(candidate_jsons=json_data, checkpoint_dir=checkpoint_dir,
+                                                  criteria_key=candidate)
+            shutil.copy(checkpoint_path, dest)
+            params_map[os.path.basename(checkpoint_path)] = json_data[0]["model_params"]
+
+        with open(os.path.join(dest, "params.json"), "w") as f:
+            json.dump(params_map, f)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("json_dir", type=str)
+    parser.add_argument("checkpoint_dir", type=str)
+    parser.add_argument("--out_dir", type=str, default="out_models")
+    args = parser.parse_args()
+    process_json_list(args.json_dir, args.checkpoint_dir, args.out_dir)
@@ -0,0 +1,37 @@
+# Created by Patrick Kao at 3/11/22
+import argparse
+import json
+import os
+import shutil
+from pathlib import Path
+
+SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+
+
+def get_matching_checkpoints(checkpoint_dir: str, filter_str: str, params_str: str,
+                             out_dir: str = "matching_checkpoints"):
+    """
+    Finds all checkpoints matching filter_str and creates a params.json file for them with the params_str given by
+    params_str
+    :return:
+    """
+
+    model_params = {}
+    out_dir = os.path.join(SCRIPT_DIR, out_dir)
+    Path(out_dir).mkdir(exist_ok=True, parents=True)
+    for checkpoint in sorted(os.listdir(checkpoint_dir)):
+        if filter_str in checkpoint and ".hdf5" in checkpoint:
+            model_params[checkpoint] = params_str
+            shutil.copy(os.path.join(checkpoint_dir, checkpoint), out_dir)
+
+    with open(os.path.join(out_dir, "params.json"), "w") as f:
+        json.dump(model_params, f)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("checkpoint_dir", type=str)
+    parser.add_argument("filter_str", type=str)
+    parser.add_argument("params_str", type=str)
+    args = parser.parse_args()
+    get_matching_checkpoints(args.checkpoint_dir, args.filter_str, args.params_str)
@@ -0,0 +1,31 @@
+# Created by Patrick Kao at 3/16/22
+"""
+Calculates the intersection of the img dirs of the images in a data processing json file and a data directory and
+only saves the json with corresponding entries in the data directory
+"""
+import argparse
+import json
+import os.path
+from typing import Any, Dict
+
+
+def get_intersection_json(data_json: str, data_dir: str, out_path: str = "intersect.json") -> Dict[str, Any]:
+    to_ret = []
+    with open(data_json, "r") as f:
+        synth_data = json.load(f)
+
+    for img_path, center_coords in synth_data:
+        img_dir = os.path.basename(os.path.dirname(img_path))
+        if os.path.exists(os.path.join(data_dir, img_dir)):
+            to_ret.append([img_path, center_coords])
+
+    with open(out_path, "w") as f:
+        json.dump(to_ret, f)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("data_json", type=str)
+    parser.add_argument("data_dir", type=str)
+    args = parser.parse_args()
+    get_intersection_json(args.data_json, args.data_dir)
@@ -0,0 +1,37 @@
+# Created by Patrick Kao at 4/18/22
+import argparse
+import json
+import os
+import shutil
+from pathlib import Path
+from typing import Sequence
+
+
+def merge_model_dirs(merge_dirs: Sequence[str], out_dir: str):
+    for model_type in ["train", "val"]:
+        out_json = {}
+        for model_dir in merge_dirs:
+            dir_path = os.path.join(model_dir, model_type)
+            type_out = os.path.join(out_dir, model_type)
+            Path(type_out).mkdir(parents=True, exist_ok=True)
+            contents = os.listdir(dir_path)
+            model_names = [file for file in contents if ".hdf5" in file]
+            for model in model_names:
+                abs_path = os.path.join(dir_path, model)
+                shutil.copy(abs_path, type_out)
+
+            with open(os.path.join(dir_path, "params.json"), "r") as f:
+                param_data = json.load(f)
+
+            out_json.update(param_data)
+
+            with open(os.path.join(type_out, "params.json"), "w") as f:
+                json.dump(out_json, f)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument("merge_dirs", nargs='+', default=[])
+    parser.add_argument("--out_dir", default="merged_models")
+    args = parser.parse_args()
+    merge_model_dirs(args.merge_dirs, args.out_dir)