|
1 | 1 | import os
|
2 | 2 |
|
3 |
| -def setup_working_directories(config_vars): |
| 3 | +import numpy as np |
4 | 4 |
|
5 |
| - ## Expected raw data directories: |
6 |
| - config_vars["raw_images_dir"] = os.path.join(config_vars["root_directory"], 'raw_images/') |
7 |
| - config_vars["raw_annotations_dir"] = os.path.join(config_vars["root_directory"], 'raw_annotations/') |
| 5 | +import pathlib |
| 6 | +from tqdm import tqdm |
8 | 7 |
|
9 |
| - ## Split files |
10 |
| - config_vars["path_files_training"] = os.path.join(config_vars["root_directory"], 'training.txt') |
11 |
| - config_vars["path_files_validation"] = os.path.join(config_vars["root_directory"], 'validation.txt') |
12 |
| - config_vars["path_files_test"] = os.path.join(config_vars["root_directory"], 'test.txt') |
| 8 | +import skimage.io |
| 9 | +import skimage.segmentation |
| 10 | +import tensorflow as tf |
13 | 11 |
|
14 |
| - ## Transformed data directories: |
15 |
| - config_vars["normalized_images_dir"] = os.path.join(config_vars["root_directory"], 'norm_images/') |
16 |
| - config_vars["boundary_labels_dir"] = os.path.join(config_vars["root_directory"], 'boundary_labels/') |
| 12 | +def preprocess_input_images(raw_images_dir, normalized_images_dir): |
| 13 | + filelist = sorted(os.listdir(raw_images_dir)) |
17 | 14 |
|
18 |
| - return config_vars |
| 15 | + # run over all raw images |
| 16 | + for filename in tqdm(filelist): |
19 | 17 |
|
| 18 | + # load image and its annotation |
| 19 | + orig_img = skimage.io.imread(raw_images_dir + filename) |
| 20 | + |
| 21 | + # IMAGE |
| 22 | + |
| 23 | + # normalize to [0,1] |
| 24 | + percentile = 99.9 |
| 25 | + high = np.percentile(orig_img, percentile) |
| 26 | + low = np.percentile(orig_img, 100-percentile) |
| 27 | + |
| 28 | + img = np.minimum(high, orig_img) |
| 29 | + img = np.maximum(low, img) |
| 30 | + |
| 31 | + img = (img - low) / (high - low) # gives float64, thus cast to 8 bit later |
| 32 | + |
| 33 | + img = skimage.img_as_ubyte(img) |
| 34 | + img1 = img[:256,:256] |
| 35 | + skimage.io.imsave(normalized_images_dir + filename[:-4] + '_1.png', img1) |
| 36 | + img2 = img[256:512,:256] |
| 37 | + skimage.io.imsave(normalized_images_dir + filename[:-4] + '_2.png', img2) |
| 38 | + img3 = img[:256,256:512] |
| 39 | + skimage.io.imsave(normalized_images_dir + filename[:-4] + '_3.png', img3) |
| 40 | + img4 = img[256:512,256:512] |
| 41 | + skimage.io.imsave(normalized_images_dir + filename[:-4] + '_4.png', img4) |
| 42 | + |
| 43 | +def preprocess_output_masks(raw_annotations_dir, boundary_labels_dir, boundary_size=2, min_nucleus_size=25): |
| 44 | + filelist = sorted(os.listdir(raw_annotations_dir)) |
| 45 | + total_objects = 0 |
| 46 | + |
| 47 | + # run over all raw images |
| 48 | + for filename in tqdm(filelist): |
| 49 | + |
| 50 | + # GET ANNOTATION |
| 51 | + annot = skimage.io.imread(raw_annotations_dir + filename) |
| 52 | + |
| 53 | + # strip the first channel |
| 54 | + if len(annot.shape) == 3: |
| 55 | + annot = annot[:,:,0] |
| 56 | + |
| 57 | + # label the annotations nicely to prepare for future filtering operation |
| 58 | + annot = skimage.morphology.label(annot) |
| 59 | + total_objects += len(np.unique(annot)) - 1 |
| 60 | + |
| 61 | + # filter small objects, e.g. micronulcei |
| 62 | + annot = skimage.morphology.remove_small_objects(annot, min_size=min_nucleus_size) |
| 63 | + |
| 64 | + # find boundaries |
| 65 | + boundaries = skimage.segmentation.find_boundaries(annot) |
| 66 | + |
| 67 | + for k in range(2, boundary_size, 2): |
| 68 | + boundaries = skimage.morphology.binary_dilation(boundaries) |
| 69 | + |
| 70 | + # BINARY LABEL |
| 71 | + |
| 72 | + # prepare buffer for binary label |
| 73 | + label_binary = np.zeros((annot.shape + (3,))) |
| 74 | + |
| 75 | + # write binary label |
| 76 | + label_binary[(annot == 0) & (boundaries == 0), 0] = 1 |
| 77 | + label_binary[(annot != 0) & (boundaries == 0), 1] = 1 |
| 78 | + label_binary[boundaries == 1, 2] = 1 |
| 79 | + |
| 80 | + # Split the image into four 256x256 byte squares, to make the UNet happy. |
| 81 | + # TODO(jen) - Pad the image to make a larger square instead? |
| 82 | + img1 = label_binary[:256,:256] |
| 83 | + skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_1.png', img1) |
| 84 | + img2 = label_binary[256:512,:256] |
| 85 | + skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_2.png', img2) |
| 86 | + img3 = label_binary[:256,256:512] |
| 87 | + skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_3.png', img3) |
| 88 | + img4 = label_binary[256:512,256:512] |
| 89 | + skimage.io.imsave(boundary_labels_dir + filename[:-4] + '_4.png', img4) |
| 90 | + |
| 91 | + |
| 92 | + print("Total objects: ",total_objects) |
| 93 | + |
| 94 | +def create_tf_examples(normalized_images_dir, boundary_labels_dir, tf_examples_dir): |
| 95 | + def _bytes_feature(value): |
| 96 | + return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value])) |
| 97 | + |
| 98 | + filelist = sorted(os.listdir(normalized_images_dir)) |
| 99 | + |
| 100 | + |
| 101 | + # run over all raw images |
| 102 | + for filename in tqdm(filelist): |
| 103 | + with tf.python_io.TFRecordWriter(tf_examples_dir + filename[:-3] + "tfrecord") as writer: |
| 104 | + with open(normalized_images_dir + filename, mode='rb') as file: # b is important -> binary |
| 105 | + image = file.read() |
| 106 | + with open(boundary_labels_dir + filename, mode='rb') as file: # b is important -> binary |
| 107 | + mask = file.read() |
| 108 | + example = tf.train.Example(features = tf.train.Features( |
| 109 | + feature = |
| 110 | + { |
| 111 | + 'image':_bytes_feature(image), |
| 112 | + 'mask':_bytes_feature(mask) |
| 113 | + })) |
| 114 | + writer.write(example.SerializeToString()) |
| 115 | + |
| 116 | +if __name__ == "__main__": |
| 117 | + datadir = "/Users/eiofinova/Documents/unet4nuclei_old/data/" |
| 118 | + raw_images_dir = os.path.join(datadir, "raw_images/") |
| 119 | + normalized_images_dir = os.path.join(datadir, "norm_images/") |
| 120 | + raw_annotations_dir = os.path.join(datadir, "raw_annotations/") |
| 121 | + boundary_labels_dir = os.path.join(datadir, "boundary_labels/") |
| 122 | + tf_examples_dir = os.path.join(datadir, "tf_examples/") |
| 123 | + |
| 124 | + preprocess_input_images(raw_images_dir, normalized_images_dir) |
| 125 | + preprocess_output_masks(raw_annotations_dir, boundary_labels_dir) |
| 126 | + create_tf_examples(normalized_images_dir, boundary_labels_dir, tf_examples_dir) |
20 | 127 |
|
0 commit comments