Skip to content

Commit abf7b1b

Browse files
committed
added dropout to layers, didn't seem to help, might need more epochs
1 parent 3161198 commit abf7b1b

File tree

9 files changed

+83
-69
lines changed

9 files changed

+83
-69
lines changed

ReadMe.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,9 @@ Right now there are 8908 images in the [files_trainable](https://github.com/comm
4848
It seems to perform ok after >20 epochs, but the fine detail seems to struggle.
4949
Training started at 4:53pm on March 13, 2022 and reached epoch 33 at 8:55pm (7 minutes per epoch) on a 1080Ti card.
5050
It would be interesting to perform evaluation only on "confident" network returns.
51-
Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs
51+
Average loss of 0.0694 on test and 0.0549 on training data after 100 epochs.
52+
If dropout is used the average loss is 0.1060 on test and 0.0960 on training data after 100 epochs.
53+
5254

5355
Input picture (left), groundtruth (top right), and prediction (bottom right)
5456
![](docs/example_pred.png)

docs/example_pred.png

90.4 KB
Loading

docs/example_probs.png

44.8 KB
Loading

src/data/Comma10kDataset.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,7 @@ Comma10kDataset::Comma10kDataset(std::string pathroot, ModeDataSplit mode, bool
6060

6161
// Random order (ensure same random shuffle on both)
6262
// https://stackoverflow.com/a/16968342
63-
if (randomize) {
63+
if (mode == Comma10kDataset::ModeDataSplit::kTrain && randomize) {
6464
unsigned int seed = std::time(NULL);
6565
std::srand(seed);
6666
std::random_shuffle(paths_rgb.begin(), paths_rgb.end());

src/net_seg_test.cpp

Lines changed: 65 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,12 @@ int main(int argc, char *argv[]) {
7777

7878
// Finally convert it to a unique pointer dataloader
7979
auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>());
80-
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(1).workers(6));
80+
auto sampler = torch::data::samplers::SequentialSampler(dataset.size().value());
81+
auto options = torch::data::DataLoaderOptions().enforce_ordering(true).batch_size(1).workers(10);
82+
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options);
8183

8284
// Loop through our batches of training data
85+
bool visualize = true;
8386
double loss_sum = 0.0;
8487
size_t loss_ct = 0;
8588
size_t batch_idx = 0;
@@ -108,66 +111,68 @@ int main(int argc, char *argv[]) {
108111
std::cout << items_curr << "/" << items_total << " | loss = " << loss.item<float>() << " | loss_avg = " << loss_avg << " (" << loss_ct
109112
<< " samples)" << std::endl;
110113

111-
// Softmax the output to get our total class probabilities [N, classes, H, W]
112-
// Thus across all classes, our probabilities should sum to 1
113-
auto output_probs = torch::softmax(output, 1);
114-
115-
// Plot the first image, need to change to opencv format [H,W,C]
116-
// Note that we arg max the softmax network output, then need to add an dimension
117-
// We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int)
118-
torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu();
119-
torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu();
120-
torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu();
121-
122-
// Convert them all to 0..255 ranges
123-
cv_input = cv_input.to(torch::kInt8);
124-
cv_label = cv_label.to(torch::kInt8);
125-
cv_output = cv_output.to(torch::kInt8);
126-
127-
// Point the cv::Mats to the transformed locations in memory
128-
cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr<int8_t>());
129-
cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr<int8_t>());
130-
cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr<int8_t>());
131-
132-
// Convert labeled images to color
133-
cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR);
134-
cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR);
135-
// img_label = 255.0 / (double)n_classes * img_label;
136-
// img_output = 255.0 / (double)n_classes * img_output;
137-
138-
// Change both to be colored like the comma10k
139-
img_label.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
140-
img_output.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
141-
142-
// Finally stack and display in a window
143-
cv::Mat outimg1, outimg2, outimg3;
144-
cv::hconcat(img_input, img_label, outimg1);
145-
cv::hconcat(img_input, img_output, outimg2);
146-
cv::vconcat(outimg1, outimg2, outimg3);
147-
cv::imshow("prediction", outimg3);
148-
149-
// Next we will visualize our probability distributions [N, classes, H, W]
150-
torch::Tensor cv_probs = output_probs[0].clone().cpu();
151-
cv_probs = cv_probs.to(torch::kFloat32);
152-
cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0));
153-
assert((size_t)output_probs.size(0) == 1);
154-
assert((size_t)cv_probs.size(0) == n_classes);
155-
for (int n = 0; n < (int)n_classes; n++) {
156-
cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr<float>());
157-
imgtmp = 255 * imgtmp;
158-
imgtmp.convertTo(imgtmp, CV_8UC1);
159-
cv::Mat imgtmp_color;
160-
cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET);
161-
imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows)));
114+
// Visualize if we need to
115+
if (visualize) {
116+
// Softmax the output to get our total class probabilities [N, classes, H, W]
117+
// Thus across all classes, our probabilities should sum to 1
118+
auto output_probs = torch::softmax(output, 1);
119+
120+
// Plot the first image, need to change to opencv format [H,W,C]
121+
// Note that we arg max the softmax network output, then need to add an dimension
122+
// We scale up the 0..1 range back to the 0..255 that opencv expects (later cast to int)
123+
torch::Tensor cv_input = 255.0 * batch.data[0].permute({1, 2, 0}).clone().cpu();
124+
torch::Tensor cv_label = batch.target[0].permute({1, 2, 0}).clone().cpu();
125+
torch::Tensor cv_output = torch::unsqueeze(output_probs[0].argmax(0), 0).permute({1, 2, 0}).clone().cpu();
126+
127+
// Convert them all to 0..255 ranges
128+
cv_input = cv_input.to(torch::kInt8);
129+
cv_label = cv_label.to(torch::kInt8);
130+
cv_output = cv_output.to(torch::kInt8);
131+
132+
// Point the cv::Mats to the transformed locations in memory
133+
cv::Mat img_input(cv::Size((int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv_input.data_ptr<int8_t>());
134+
cv::Mat img_label(cv::Size((int)cv_label.size(1), (int)cv_label.size(0)), CV_8UC1, cv_label.data_ptr<int8_t>());
135+
cv::Mat img_output(cv::Size((int)cv_output.size(1), (int)cv_output.size(0)), CV_8UC1, cv_output.data_ptr<int8_t>());
136+
137+
// Convert labeled images to color
138+
cv::cvtColor(img_label, img_label, cv::COLOR_GRAY2BGR);
139+
cv::cvtColor(img_output, img_output, cv::COLOR_GRAY2BGR);
140+
// img_label = 255.0 / (double)n_classes * img_label;
141+
// img_output = 255.0 / (double)n_classes * img_output;
142+
143+
// Change both to be colored like the comma10k
144+
img_label.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
145+
img_output.forEach<cv::Vec3b>([&](cv::Vec3b &px, const int *pos) -> void { px = dataset.map_id2hex[(char)px[0]]; });
146+
147+
// Finally stack and display in a window
148+
cv::Mat outimg1, outimg2, outimg3;
149+
cv::hconcat(img_input, img_label, outimg1);
150+
cv::hconcat(img_input, img_output, outimg2);
151+
cv::vconcat(outimg1, outimg2, outimg3);
152+
cv::imshow("prediction", outimg3);
153+
154+
// Next we will visualize our probability distributions [N, classes, H, W]
155+
torch::Tensor cv_probs = output_probs[0].clone().cpu();
156+
cv_probs = cv_probs.to(torch::kFloat32);
157+
cv::Mat outimg4 = cv::Mat(cv::Size(n_classes * (int)cv_input.size(1), (int)cv_input.size(0)), CV_8UC3, cv::Scalar(0, 0, 0));
158+
assert((size_t)output_probs.size(0) == 1);
159+
assert((size_t)cv_probs.size(0) == n_classes);
160+
for (int n = 0; n < (int)n_classes; n++) {
161+
cv::Mat imgtmp(cv::Size((int)cv_probs.size(2), (int)cv_probs.size(1)), CV_32FC1, cv_probs[n].data_ptr<float>());
162+
imgtmp = 255 * imgtmp;
163+
imgtmp.convertTo(imgtmp, CV_8UC1);
164+
cv::Mat imgtmp_color;
165+
cv::applyColorMap(imgtmp, imgtmp_color, cv::COLORMAP_JET);
166+
imgtmp_color.copyTo(outimg4(cv::Rect(n * (int)cv_input.size(1), 0, imgtmp.cols, imgtmp.rows)));
167+
}
168+
cv::imshow("uncertainties", outimg4);
169+
cv::waitKey(100);
170+
171+
// Save to file for readme
172+
// cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3);
173+
// cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4);
174+
// std::exit(EXIT_FAILURE);
162175
}
163-
cv::imshow("uncertainties", outimg4);
164-
cv::waitKey(100);
165-
166-
// Save to file for readme
167-
// cv::imwrite("/home/patrick/github/segnet/docs/example_pred.png", outimg3);
168-
// cv::imwrite("/home/patrick/github/segnet/docs/example_probs.png", outimg4);
169-
// std::exit(EXIT_FAILURE);
170-
171176
batch_idx++;
172177
}
173178
}

src/net_seg_train.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,9 @@ int main() {
7171

7272
// Finally convert it to a unique pointer dataloader
7373
auto dataset_mapped = dataset.map(torch::data::transforms::Stack<>());
74-
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), torch::data::DataLoaderOptions().batch_size(5).workers(30));
74+
auto sampler = torch::data::samplers::RandomSampler(dataset.size().value());
75+
auto options = torch::data::DataLoaderOptions().enforce_ordering(false).batch_size(5).workers(30);
76+
auto data_loader = torch::data::make_data_loader(std::move(dataset_mapped), sampler, options);
7577

7678
// Create the optimizer
7779
// torch::optim::SGD optimizer(model->parameters(), torch::optim::SGDOptions(0.01).momentum(0.5));
@@ -111,7 +113,7 @@ int main() {
111113
optimizer.step();
112114

113115
// Print our the loss every once in a while
114-
if (batch_idx % 10 == 0) {
116+
if (batch_idx % 100 == 0) {
115117

116118
// Debug printout
117119
size_t items_curr = batch_idx * batch.data.size(0);

src/network/blocks/UNetBlocks.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,10 @@ struct UNetDownwardsImpl : torch::nn::Module {
6363
}
6464

6565
// Forward propagation
66-
torch::Tensor forward(torch::Tensor input) { return conv2(conv1(torch::max_pool2d(input, 2))); }
66+
torch::Tensor forward(torch::Tensor input) {
67+
auto output = conv2(conv1(torch::max_pool2d(input, 2)));
68+
return torch::dropout(output, 0.5, this->is_training());
69+
}
6770

6871
// Parts of the network
6972
// NOTE: for submodules, we call the "empty holder" constructor
@@ -96,7 +99,8 @@ struct UNetUpwardsImpl : torch::nn::Module {
9699
input = torch::cat({input, bridge}, 1);
97100

98101
// Finally do our convolutions and return
99-
return conv2(conv1(input));
102+
auto output = torch::dropout(input, 0.5, this->is_training());
103+
return conv2(conv1(output));
100104
}
101105

102106
// Parts of the network

src/network/models/UNetModel.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ struct UNetModelImpl : torch::nn::Module {
9191
// First do our starting two convolutions
9292
x1 = inconv1(input);
9393
x1 = inconv2(x1);
94+
x1 = torch::dropout(x1, 0.25, this->is_training());
9495

9596
// Downscale to the bottleneck
9697
x2 = down1(x1);

src/utils/augmentations.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -135,9 +135,9 @@ inline void random_camera_model(cv::Mat &cv_rgb, cv::Mat &cv_label) {
135135
cam.at<float>(2, 1) = 0.0f;
136136
cam.at<float>(2, 2) = 1.0f;
137137
cv::Mat dist(5, 1, cv::DataType<float>::type);
138-
dist.at<float>(0, 0) = 0.1 * unif_pn(rng);
139-
dist.at<float>(1, 0) = 0.05 * unif_pn(rng);
140-
dist.at<float>(2, 0) = 1e-3 * unif_pn(rng);
138+
dist.at<float>(0, 0) = 0.20 * unif_pn(rng);
139+
dist.at<float>(1, 0) = 0.10 * unif_pn(rng);
140+
dist.at<float>(2, 0) = 1e-2 * unif_pn(rng);
141141
dist.at<float>(3, 0) = 1e-4 * unif_pn(rng);
142142
dist.at<float>(4, 0) = 1e-5 * unif_pn(rng);
143143

0 commit comments

Comments
 (0)