added dropout to layers, didn't seem to help, might need more epochs

goldbattle · goldbattle · commit abf7b1b336df · 2022-03-15T12:38:43.000-04:00
diff --git a/ReadMe.md b/ReadMe.md
@@ -48,7 +48,9 @@ Right now there are 8908 images in the [files_trainable](https://github.com/comm
 It seems to perform ok after >20 epochs, but the fine detail seems to struggle.
 Training started at 4:53pm on March 13, 2022 and reached epoch 33 at 8:55pm (7 minutes per epoch) on a 1080Ti card.
 It would be interesting to perform evaluation only on "confident" network returns.
-Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs
+Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs.
+If dropout is used the average loss is 0.1060 on test and 0.0960 on training data after 100 epochs.
+
 
 Input picture (left), groundtruth (top right), and prediction (bottom right)
 ![](docs/example_pred.png)
diff --git a/docs/example_pred.png b/docs/example_pred.png
diff --git a/docs/example_probs.png b/docs/example_probs.png
diff --git a/src/data/Comma10kDataset.cpp b/src/data/Comma10kDataset.cpp
@@ -60,7 +60,7 @@ Comma10kDataset::Comma10kDataset(std::string pathroot, ModeDataSplit mode, bool
 
   // Random order (ensure same random shuffle on both)
   // https://stackoverflow.com/a/16968342
-  if (randomize) {
+  if (mode == Comma10kDataset::ModeDataSplit::kTrain && randomize) {
     unsigned int seed = std::time(NULL);
     std::srand(seed);
     std::random_shuffle(paths_rgb.begin(), paths_rgb.end());
diff --git a/src/net_seg_test.cpp b/src/net_seg_test.cpp
@@ -77,9 +77,12 @@ int main(int argc, char *argv[]) {
 
   // Finally convert it to a unique pointer dataloader
   auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>());
-  auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(1).workers(6));
+  auto sampler = torch::data::samplers::SequentialSampler(dataset.size().value());
+  auto options = torch::data::DataLoaderOptions().enforce_ordering(true).batch_size(1).workers(10);
+  auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options);
 
   // Loop through our batches of training data
+  bool visualize = true;
   double loss_sum = 0.0;
   size_t loss_ct = 0;
   size_t batch_idx = 0;
@@ -108,66 +111,68 @@ int main(int argc, char *argv[]) {
     std::cout << items_curr << "/" << items_total << " | loss = " << loss.item<float>() << " | loss_avg = " << loss_avg << " (" << loss_ct
               << " samples)" << std::endl;
 
-    // Softmax the output to get our total class probabilities [N, classes, H, W]
-    // Thus across all classes, our probabilities should sum to 1
-    auto output_probs = torch::softmax(output, 1);
-
-    // Plot the first image, need to change to opencv format [H,W,C]
-    // Note that we arg max the softmax network output, then need to add an dimension
-    // We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int)
-    torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu();
-    torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu();
-    torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu();
-
-    // Convert them all to 0..255 ranges
-    cv_input = cv_input.to(torch::kInt8);
-    cv_label = cv_label.to(torch::kInt8);
-    cv_output = cv_output.to(torch::kInt8);
-
-    // Point the cv::Mats to the transformed locations in memory
-    cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr<int8_t>());
-    cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr<int8_t>());
-    cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr<int8_t>());
-
-    // Convert labeled images to color
-    cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR);
-    cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR);
-    // img_label = 255.0 / (double)n_classes * img_label;
-    // img_output = 255.0 / (double)n_classes * img_output;
-
-    // Change both to be colored like the comma10k
-    img_label.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
-    img_output.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
-
-    // Finally stack and display in a window
-    cv::Mat outimg1, outimg2, outimg3;
-    cv::hconcat(img_input, img_label, outimg1);
-    cv::hconcat(img_input, img_output, outimg2);
-    cv::vconcat(outimg1, outimg2, outimg3);
-    cv::imshow("prediction", outimg3);
-
-    // Next we will visualize our probability distributions  [N, classes, H, W]
-    torch::Tensor cv_probs = output_probs[0].clone().cpu();
-    cv_probs = cv_probs.to(torch::kFloat32);
-    cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0));
-    assert((size_t)output_probs.size(0) == 1);
-    assert((size_t)cv_probs.size(0) == n_classes);
-    for (int n = 0; n < (int)n_classes; n++) {
-      cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr<float>());
-      imgtmp = 255 * imgtmp;
-      imgtmp.convertTo(imgtmp, CV_8UC1);
-      cv::Mat imgtmp_color;
-      cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET);
-      imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows)));
+    // Visualize if we need to
+    if (visualize) {
+      // Softmax the output to get our total class probabilities [N, classes, H, W]
+      // Thus across all classes, our probabilities should sum to 1
+      auto output_probs = torch::softmax(output, 1);
+
+      // Plot the first image, need to change to opencv format [H,W,C]
+      // Note that we arg max the softmax network output, then need to add an dimension
+      // We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int)
+      torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu();
+      torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu();
+      torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu();
+
+      // Convert them all to 0..255 ranges
+      cv_input = cv_input.to(torch::kInt8);
+      cv_label = cv_label.to(torch::kInt8);
+      cv_output = cv_output.to(torch::kInt8);
+
+      // Point the cv::Mats to the transformed locations in memory
+      cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr<int8_t>());
+      cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr<int8_t>());
+      cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr<int8_t>());
+
+      // Convert labeled images to color
+      cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR);
+      cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR);
+      // img_label = 255.0 / (double)n_classes * img_label;
+      // img_output = 255.0 / (double)n_classes * img_output;
+
+      // Change both to be colored like the comma10k
+      img_label.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
+      img_output.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
+
+      // Finally stack and display in a window
+      cv::Mat outimg1, outimg2, outimg3;
+      cv::hconcat(img_input, img_label, outimg1);
+      cv::hconcat(img_input, img_output, outimg2);
+      cv::vconcat(outimg1, outimg2, outimg3);
+      cv::imshow("prediction", outimg3);
+
+      // Next we will visualize our probability distributions  [N, classes, H, W]
+      torch::Tensor cv_probs = output_probs[0].clone().cpu();
+      cv_probs = cv_probs.to(torch::kFloat32);
+      cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0));
+      assert((size_t)output_probs.size(0) == 1);
+      assert((size_t)cv_probs.size(0) == n_classes);
+      for (int n = 0; n < (int)n_classes; n++) {
+        cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr<float>());
+        imgtmp = 255 * imgtmp;
+        imgtmp.convertTo(imgtmp, CV_8UC1);
+        cv::Mat imgtmp_color;
+        cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET);
+        imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows)));
+      }
+      cv::imshow("uncertainties", outimg4);
+      cv::waitKey(100);
+
+      // Save to file for readme
+      // cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3);
+      // cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4);
+      // std::exit(EXIT_FAILURE);
     }
-    cv::imshow("uncertainties", outimg4);
-    cv::waitKey(100);
-
-    // Save to file for readme
-    // cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3);
-    // cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4);
-    // std::exit(EXIT_FAILURE);
-
     batch_idx++;
   }
 }
diff --git a/src/net_seg_train.cpp b/src/net_seg_train.cpp
@@ -71,7 +71,9 @@ int main() {
 
   // Finally convert it to a unique pointer dataloader
   auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>());
-  auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(5).workers(30));
+  auto sampler = torch::data::samplers::RandomSampler(dataset.size().value());
+  auto options = torch::data::DataLoaderOptions().enforce_ordering(false).batch_size(5).workers(30);
+  auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options);
 
   // Create the optimizer
   // torch::optim::SGD optimizer(model->parameters(), torch::optim::SGDOptions(0.01).momentum(0.5));
@@ -111,7 +113,7 @@ int main() {
       optimizer.step();
 
       // Print our the loss every once in a while
-      if (batch_idx % 10 == 0) {
+      if (batch_idx % 100 == 0) {
 
         // Debug printout
         size_t items_curr = batch_idx * batch.data.size(0);
diff --git a/src/network/blocks/UNetBlocks.h b/src/network/blocks/UNetBlocks.h
@@ -63,7 +63,10 @@ struct UNetDownwardsImpl : torch::nn::Module {
   }
 
   // Forward propagation
-  torch::Tensor forward(torch::Tensor input) { return conv2(conv1(torch::max_pool2d(input, 2))); }
+  torch::Tensor forward(torch::Tensor input) {
+    auto output = conv2(conv1(torch::max_pool2d(input, 2)));
+    return torch::dropout(output, 0.5, this->is_training());
+  }
 
   // Parts of the network
   // NOTE: for submodules, we call the "empty holder" constructor
@@ -96,7 +99,8 @@ struct UNetUpwardsImpl : torch::nn::Module {
     input = torch::cat({input, bridge}, 1);
 
     // Finally do our convolutions and return
-    return conv2(conv1(input));
+    auto output = torch::dropout(input, 0.5, this->is_training());
+    return conv2(conv1(output));
   }
 
   // Parts of the network
diff --git a/src/network/models/UNetModel.h b/src/network/models/UNetModel.h
@@ -91,6 +91,7 @@ struct UNetModelImpl : torch::nn::Module {
     // First do our starting two convolutions
     x1 = inconv1(input);
     x1 = inconv2(x1);
+    x1 = torch::dropout(x1, 0.25, this->is_training());
 
     // Downscale to the bottleneck
     x2 = down1(x1);
diff --git a/src/utils/augmentations.h b/src/utils/augmentations.h
@@ -135,9 +135,9 @@ inline void random_camera_model(cv::Mat &cv_rgb, cv::Mat &cv_label) {
   cam.at<float>(2, 1) = 0.0f;
   cam.at<float>(2, 2) = 1.0f;
   cv::Mat dist(5, 1, cv::DataType<float>::type);
-  dist.at<float>(0, 0) = 0.1 * unif_pn(rng);
-  dist.at<float>(1, 0) = 0.05 * unif_pn(rng);
-  dist.at<float>(2, 0) = 1e-3 * unif_pn(rng);
+  dist.at<float>(0, 0) = 0.20 * unif_pn(rng);
+  dist.at<float>(1, 0) = 0.10 * unif_pn(rng);
+  dist.at<float>(2, 0) = 1e-2 * unif_pn(rng);
   dist.at<float>(3, 0) = 1e-4 * unif_pn(rng);
   dist.at<float>(4, 0) = 1e-5 * unif_pn(rng);