Skip to content

Commit 9134342

Browse files
authored
Tile images for general preprocessing/super-resolution
1 parent 2a34a30 commit 9134342

File tree

1 file changed

+257
-0
lines changed

1 file changed

+257
-0
lines changed

tile_im.py

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
#!/usr/bin/env python2
2+
# -*- coding: utf-8 -*-
3+
"""
4+
Created on Thu Aug 23 15:55:35 2018
5+
6+
@author: avanetten
7+
"""
8+
9+
10+
from __future__ import print_function
11+
12+
import os
13+
import time
14+
import argparse
15+
import numpy as np
16+
import pandas as pd
17+
import cv2
18+
# cv2 can't load large files, so need to import skimage too
19+
import skimage.io
20+
21+
#import sys
22+
#path_basiss = os.path.dirname(os.path.realpath(__file__))
23+
#sys.path.append(path_basiss)
24+
#import basiss
25+
26+
###############################################################################
27+
def slice_ims(im_dir, out_dir, slice_x, slice_y,
28+
stride_x, stride_y,
29+
pos_columns = ['idx', 'name', 'name_full', 'xmin',
30+
'ymin', 'slice_x',
31+
'slice_y', 'im_x', 'im_y'],
32+
sep0='__', sep1='_', pad=0, ext='.tif', verbose=True):
33+
'''Slice images into patches, assume ground truth masks
34+
are present
35+
Adapted from basiss.py'''
36+
37+
if verbose:
38+
print ("Slicing images in:", im_dir)
39+
40+
t0 = time.time()
41+
count = 0
42+
pos_list, name_list = [], []
43+
#nims,h,w,nbands = im_arr.shape
44+
45+
im_roots = [z for z in os.listdir(im_dir) if z.endswith('.tif')]
46+
47+
#im_roots = ['90.tif']
48+
49+
50+
for i,im_root in enumerate(im_roots):
51+
52+
im_path = os.path.join(im_dir, im_root)
53+
if verbose:
54+
print (i, "/", len(im_roots), "im_path:", im_path)
55+
name = im_root.split('.')[0]
56+
57+
use_skimage = False
58+
try:
59+
# cv2 can't load large files
60+
im = cv2.imread(im_path)
61+
except:
62+
# load with skimage, (reversed order of bands)
63+
im = skimage.io.imread(im_path)#[::-1]
64+
use_skimage = True
65+
66+
h, w, nbands = im.shape
67+
print ("im.shape:", im.shape)
68+
69+
seen_coords = set()
70+
71+
#if verbose and (i % 10) == 0:
72+
# print (i, "im_root:", im_root)
73+
74+
# dice it up
75+
# after resize, iterate through image
76+
# and bin it up appropriately
77+
for x in range(0, w, stride_x):
78+
for y in range(0, h, stride_y):
79+
80+
xmin = max(0, min(x, w-slice_x) )
81+
ymin = max(0, min(y, h - slice_y) )
82+
coords = (xmin, ymin)
83+
84+
# check if we've already seen these coords
85+
if coords in seen_coords:
86+
continue
87+
else:
88+
seen_coords.add(coords)
89+
90+
# check if we screwed up binning
91+
if (slice_x <= w and (xmin + slice_x > w)) \
92+
or (slice_y <= h and (ymin + slice_y > h)):
93+
print ("Improperly binned image,")
94+
return
95+
96+
# get satellite image cutout
97+
im_cutout = im[ymin:ymin + slice_y,
98+
xmin:xmin + slice_x]
99+
100+
##############
101+
# skip if the whole thing is black
102+
if np.max(im_cutout) < 1.:
103+
continue
104+
else:
105+
count += 1
106+
107+
if verbose and (count % 50) == 0:
108+
print ("count:", count, "x:", x, "y:", y)
109+
###############
110+
111+
112+
# set slice name
113+
name_full = name + sep0 + str(ymin) + sep1 + str(xmin) + sep1 \
114+
+ str(slice_y) + sep1 + str(slice_x) + sep1 + str(pad) + sep1 \
115+
+ str(w) + sep1 + str(h) + ext
116+
117+
118+
##name_full = str(i) + sep + name + sep \
119+
#name_full = name + sep \
120+
# + str(xmin) + sep + str(ymin) + sep \
121+
# + str(slice_x) + sep + str(slice_y) \
122+
# + sep + str(w) + sep + str(h) \
123+
# + '.tif'
124+
125+
pos = [i, name, name_full, xmin, ymin, slice_x, slice_y, w, h]
126+
# add to arrays
127+
#idx_list.append(idx_full)
128+
name_list.append(name_full)
129+
#im_list.append(im_cutout)
130+
#mask_list.append(mask_cutout)
131+
pos_list.append(pos)
132+
133+
name_out = os.path.join(out_dir, name_full)
134+
135+
if not use_skimage:
136+
cv2.imwrite(name_out, im_cutout)
137+
else:
138+
# if we read in with skimage, need to reverse colors
139+
cv2.imwrite(name_out, cv2.cvtColor(im_cutout, cv2.COLOR_RGB2BGR))
140+
141+
# create position datataframe
142+
df_pos = pd.DataFrame(pos_list, columns=pos_columns)
143+
df_pos.index = np.arange(len(df_pos))
144+
145+
if verbose:
146+
print (" len df;", len(df_pos))
147+
print (" Time to slice arrays:", time.time() - t0, "seconds")
148+
149+
return df_pos
150+
151+
###############################################################################
152+
def main():
153+
154+
# construct the argument parse and parse the arguments
155+
parser = argparse.ArgumentParser()
156+
parser.add_argument('--im_dir', type=str, default='/ims/to/tile/',
157+
help="images location")
158+
parser.add_argument('--out_dir', type=str, default='/output/folder/for/tiles/',
159+
help="output_images location")
160+
parser.add_argument('--slice_x', type=int, default=544)
161+
parser.add_argument('--slice_y', type=int, default=544)
162+
parser.add_argument('--stride_x', type=int, default=108)
163+
parser.add_argument('--stride_y', type=int, default=108)
164+
args = parser.parse_args()
165+
166+
if not os.path.exists(args.out_dir):
167+
os.mkdir(args.out_dir)
168+
169+
df_pos = slice_ims(args.im_dir, args.out_dir, args.slice_x, args.slice_y,
170+
args.stride_x, args.stride_y,
171+
pos_columns = ['idx', 'name', 'name_full', 'xmin',
172+
'ymin', 'slice_x',
173+
'slice_y', 'im_x', 'im_y'],
174+
verbose=True)
175+
176+
177+
178+
path_tile_df_csv = os.path.join(os.path.dirname(args.out_dir), os.path.basename(args.out_dir) + '_df.csv')
179+
# save to file
180+
df_pos.to_csv(path_tile_df_csv)
181+
print ("df saved to file:", path_tile_df_csv)
182+
183+
184+
# # use config file
185+
# # use config file?
186+
# from config import Config
187+
# import json
188+
# parser = argparse.ArgumentParser()
189+
# parser.add_argument('config_path')
190+
# args = parser.parse_args()
191+
# # get config
192+
# with open(args.config_path, 'r') as f:
193+
# cfg = json.load(f)
194+
# config = Config(**cfg)
195+
#
196+
# # get input dir
197+
# path_images_8bit = os.path.join(config.path_data_root, config.test_data_refined_dir)
198+
#
199+
# # make output dirs
200+
# # first, results dir
201+
# res_dir = os.path.join(config.path_results_root, config.test_results_dir)
202+
# os.makedirs(res_dir, exist_ok=True)
203+
# path_tile_df_csv = os.path.join(config.path_results_root, config.test_results_dir, config.tile_df_csv)
204+
# path_tile_df_csv2 = os.path.join(config.path_data_root, os.path.dirname(config.test_sliced_dir), config.tile_df_csv)
205+
#
206+
# # path for sliced data
207+
# path_sliced = os.path.join(config.path_data_root, config.test_sliced_dir)
208+
#
209+
# #if not os.path.exists(config.results_dir):
210+
# # os.mkdir(config.results_dir)
211+
# #if not os.path.exists(config.path_sliced):
212+
# # os.mkdir(config.path_sliced)
213+
#
214+
# # only run if nonzer tile and sliced_dir
215+
# if (len(config.test_sliced_dir) > 0) and (config.slice_x > 0):
216+
# os.makedirs(path_sliced, exist_ok=True)
217+
#
218+
#
219+
# df_pos = slice_ims(path_images_8bit, path_sliced,
220+
# config.slice_x, config.slice_y,
221+
# config.stride_x, config.stride_y,
222+
# pos_columns = ['idx', 'name', 'name_full', 'xmin',
223+
# 'ymin', 'slice_x',
224+
# 'slice_y', 'im_x', 'im_y'],
225+
# verbose=True)
226+
# # save to file
227+
# df_pos.to_csv(path_tile_df_csv)
228+
# print ("df saved to file:", path_tile_df_csv)
229+
# # also csv save to data dir
230+
# df_pos.to_csv(path_tile_df_csv2)
231+
232+
233+
# # iterate through im_dir and gather files
234+
# im_arr = []
235+
# name_arr = []
236+
# mask_arr = []
237+
# im_roots = [z for z in os.listdir(args.im_dir) of z.endswith('.tif')]
238+
# for i,im_root in enumerate(im_roots):
239+
# im_file_name = os.path.join(im_dir, im_root)
240+
# im = cv2.imread(im_file_name, 1)
241+
# name_arr.append(im_file_name)
242+
# im_arr.append(im)
243+
#
244+
# # slice
245+
# df_pos, name_out_arr, im_out_arr, mask_out_arr = \
246+
# basiss.slice_ims(im_arr, mask_arr, names_arr,
247+
# args.slice_x, args.slice_y,
248+
# args.stride_x, args.stride_y,
249+
# pos_columns = ['idx', 'name', 'xmin',
250+
# 'ymin', 'slice_x',
251+
# 'slice_y', 'im_x', 'im_y'],
252+
# verbose=True)
253+
254+
255+
###############################################################################
256+
if __name__ == '__main__':
257+
main()

0 commit comments

Comments
 (0)