Skip to content

Commit 5c11701

Browse files
author
Eugenia Iofinova
committed
add a requirements.txt file and update preprocessing.py to be a standalone module for use in colab.
1 parent 5575e61 commit 5c11701

File tree

2 files changed

+124
-12
lines changed

2 files changed

+124
-12
lines changed

requirements.txt

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
numpy==1.15.4
2+
pathlib2==2.3.3
3+
scikit-image==0.14.1
4+
tensorflow==1.13.1
5+
tqdm==4.28.1

unet4nuclei/utils/preprocessing.py

Lines changed: 119 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,127 @@
11
import os
22

3-
def setup_working_directories(config_vars):
3+
import numpy as np
44

5-
## Expected raw data directories:
6-
config_vars["raw_images_dir"] = os.path.join(config_vars["root_directory"], 'raw_images/')
7-
config_vars["raw_annotations_dir"] = os.path.join(config_vars["root_directory"], 'raw_annotations/')
5+
import pathlib
6+
from tqdm import tqdm
87

9-
## Split files
10-
config_vars["path_files_training"] = os.path.join(config_vars["root_directory"], 'training.txt')
11-
config_vars["path_files_validation"] = os.path.join(config_vars["root_directory"], 'validation.txt')
12-
config_vars["path_files_test"] = os.path.join(config_vars["root_directory"], 'test.txt')
8+
import skimage.io
9+
import skimage.segmentation
10+
import tensorflow as tf
1311

14-
## Transformed data directories:
15-
config_vars["normalized_images_dir"] = os.path.join(config_vars["root_directory"], 'norm_images/')
16-
config_vars["boundary_labels_dir"] = os.path.join(config_vars["root_directory"], 'boundary_labels/')
12+
def preprocess_input_images(raw_images_dir, normalized_images_dir):
13+
filelist = sorted(os.listdir(raw_images_dir))
1714

18-
return config_vars
15+
# run over all raw images
16+
for filename in tqdm(filelist):
1917

18+
# load image and its annotation
19+
orig_img = skimage.io.imread(raw_images_dir + filename)
20+
21+
# IMAGE
22+
23+
# normalize to [0,1]
24+
percentile = 99.9
25+
high = np.percentile(orig_img, percentile)
26+
low = np.percentile(orig_img, 100-percentile)
27+
28+
img = np.minimum(high, orig_img)
29+
img = np.maximum(low, img)
30+
31+
img = (img - low) / (high - low) # gives float64, thus cast to 8 bit later
32+
33+
img = skimage.img_as_ubyte(img)
34+
img1 = img[:256,:256]
35+
skimage.io.imsave(normalized_images_dir + filename[:-4] + '_1.png', img1)
36+
img2 = img[256:512,:256]
37+
skimage.io.imsave(normalized_images_dir + filename[:-4] + '_2.png', img2)
38+
img3 = img[:256,256:512]
39+
skimage.io.imsave(normalized_images_dir + filename[:-4] + '_3.png', img3)
40+
img4 = img[256:512,256:512]
41+
skimage.io.imsave(normalized_images_dir + filename[:-4] + '_4.png', img4)
42+
43+
def preprocess_output_masks(raw_annotations_dir, boundary_labels_dir, boundary_size=2, min_nucleus_size=25):
44+
filelist = sorted(os.listdir(raw_annotations_dir))
45+
total_objects = 0
46+
47+
# run over all raw images
48+
for filename in tqdm(filelist):
49+
50+
# GET ANNOTATION
51+
annot = skimage.io.imread(raw_annotations_dir + filename)
52+
53+
# strip the first channel
54+
if len(annot.shape) == 3:
55+
annot = annot[:,:,0]
56+
57+
# label the annotations nicely to prepare for future filtering operation
58+
annot = skimage.morphology.label(annot)
59+
total_objects += len(np.unique(annot)) - 1
60+
61+
# filter small objects, e.g. micronulcei
62+
annot = skimage.morphology.remove_small_objects(annot, min_size=min_nucleus_size)
63+
64+
# find boundaries
65+
boundaries = skimage.segmentation.find_boundaries(annot)
66+
67+
for k in range(2, boundary_size, 2):
68+
boundaries = skimage.morphology.binary_dilation(boundaries)
69+
70+
# BINARY LABEL
71+
72+
# prepare buffer for binary label
73+
label_binary = np.zeros((annot.shape + (3,)))
74+
75+
# write binary label
76+
label_binary[(annot == 0) & (boundaries == 0), 0] = 1
77+
label_binary[(annot != 0) & (boundaries == 0), 1] = 1
78+
label_binary[boundaries == 1, 2] = 1
79+
80+
# Split the image into four 256x256 byte squares, to make the UNet happy.
81+
# TODO(jen) - Pad the image to make a larger square instead?
82+
img1 = label_binary[:256,:256]
83+
skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_1.png', img1)
84+
img2 = label_binary[256:512,:256]
85+
skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_2.png', img2)
86+
img3 = label_binary[:256,256:512]
87+
skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_3.png', img3)
88+
img4 = label_binary[256:512,256:512]
89+
skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_4.png', img4)
90+
91+
92+
print("Total objects: ",total_objects)
93+
94+
def create_tf_examples(normalized_images_dir, boundary_labels_dir, tf_examples_dir):
95+
def _bytes_feature(value):
96+
return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))
97+
98+
filelist = sorted(os.listdir(normalized_images_dir))
99+
100+
101+
# run over all raw images
102+
for filename in tqdm(filelist):
103+
with tf.python_io.TFRecordWriter(tf_examples_dir + filename[:-3] + "tfrecord") as writer:
104+
with open(normalized_images_dir + filename, mode='rb') as file: # b is important -> binary
105+
image = file.read()
106+
with open(boundary_labels_dir + filename, mode='rb') as file: # b is important -> binary
107+
mask = file.read()
108+
example = tf.train.Example(features = tf.train.Features(
109+
feature =
110+
{
111+
'image':_bytes_feature(image),
112+
'mask':_bytes_feature(mask)
113+
}))
114+
writer.write(example.SerializeToString())
115+
116+
if __name__ == "__main__":
117+
datadir = "/Users/eiofinova/Documents/unet4nuclei_old/data/"
118+
raw_images_dir = os.path.join(datadir, "raw_images/")
119+
normalized_images_dir = os.path.join(datadir, "norm_images/")
120+
raw_annotations_dir = os.path.join(datadir, "raw_annotations/")
121+
boundary_labels_dir = os.path.join(datadir, "boundary_labels/")
122+
tf_examples_dir = os.path.join(datadir, "tf_examples/")
123+
124+
preprocess_input_images(raw_images_dir, normalized_images_dir)
125+
preprocess_output_masks(raw_annotations_dir, boundary_labels_dir)
126+
create_tf_examples(normalized_images_dir, boundary_labels_dir, tf_examples_dir)
20127

0 commit comments

Comments
 (0)