-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscene_retreiver.py
233 lines (201 loc) · 9.01 KB
/
scene_retreiver.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
import argparse
import os
from pathlib import Path
import platform
import shutil
import sys
import geopandas as gpd
from tqdm import tqdm
# from lib.db import Postgres, ids2sql
from lib.lib import get_config, linux2win, read_ids, write_gdf, \
get_platform_location, PlanetScene
# from shelve_scenes import shelve_scenes
from lib.logging_utils import create_logger
import lib.constants as constants
# TODO: Add ability to select by either ID (current method) or filename
logger = create_logger(__name__, 'sh', 'DEBUG')
# External modules
sys.path.append(str(Path(__file__).parent / '..'))
try:
from db_utils.db import Postgres, ids2sql
except ImportError as e:
logger.error('db_utils module not found. It should be adjacent to '
'the planet_tools directory. Path: \n{}'.format(sys.path))
sys.exit()
# Params
PLATFORM_LOCATION = 'platform_location'
REQUIRED_FIELDS = [constants.ID, constants.SHELVED_LOC]
# Transfer methods
TM_LINK = 'link'
TM_COPY = 'copy'
shelved_base = get_config(constants.SHELVED_LOC)
if platform.system() == constants.WINDOWS:
shelved_base = Path(linux2win(shelved_base))
else:
shelved_base = Path(shelved_base)
def load_selection(scene_ids_path=None, footprint_path=None):
# If path to file of scene ids is provided, look up locations in onhand table
# Get scene ids
if scene_ids_path:
scene_ids = read_ids(scene_ids_path)
logger.info('Total IDs found: {:,}'.format(len(scene_ids)))
scene_ids = set(scene_ids)
logger.info('Unique IDs found: {:,}'.format(len(scene_ids)))
sql = """
SELECT * FROM {}
WHERE {} IN ({})""".format(constants.SCENES_ONHAND, constants.ID,
ids2sql(scene_ids))
logger.info('Loading shelved locations from onhand database: '
'{}'.format(constants.SCENES_ONHAND))
with Postgres(host=constants.SANDWICH, database=constants.PLANET) as db_src:
gdf = db_src.sql2gdf(sql_str=sql)
# TODO: Remove this once Postgres restriction on DUPS is
# implemented -> there should be no DUPs in scenes table
gdf = gdf.drop_duplicates(subset=constants.ID)
logger.info('IDs found in {}: {:,}'.format(constants.SCENES_ONHAND,
len(gdf)))
elif footprint_path:
# Use provided footprint
gdf = gpd.read_file(footprint_path)
# Make sure required fields are present
for field in [constants.SHELVED_LOC, constants.ID]:
if field not in gdf.columns:
logger.error('Selection footprint missing required field: '
'"{}"').format(field)
return gdf
def locate_scenes(selection, destination_path):
# Locate scene files
logger.info('Locating scene files...')
# Convert location to correct platform (Windows/Linux) if necessary
selection[PLATFORM_LOCATION] = selection[constants.SHELVED_LOC].apply(
lambda x: get_platform_location(x))
# Create glob generators for each scene to find to all scene files
# (metadata, etc.) e.g. "..\PSScene4Band\20191009_160416_100d*"
# scene_path_globs = [Path(p).parent.glob('{}*'.format(sid))
# for p, sid in zip(list(selection[PLATFORM_LOCATION]),
# list(selection[constants.ID]))]
# scenes = [PlanetScene(pl, shelved_parent=destination_path,
# scene_file_source=True)
# for pl in selection[PLATFORM_LOCATION].unique()]
scenes = []
for pl in tqdm(selection[PLATFORM_LOCATION].unique()):
scenes.append(PlanetScene(pl,
# shelved_parent=destination_path,
scene_file_source=True))
return scenes
# src_files = []
# for g in tqdm(scene_path_globs):
# for f in g:
# src_files.append(f)
#
# logger.info('Source files found: {:,}'.format(len(src_files)))
#
# return src_files
def copy_files(scenes, destination_path,
use_shelved_struct=False,
transfer_method=TM_COPY,
dryrun=False):
# Create destination folder structure
# TODO: Option to build directory tree the same way we will index
# (and other options, --opf)
logger.info('Creating destination paths...')
src_dsts = []
for s in tqdm(scenes):
for sf in s.scene_files:
src = sf
if use_shelved_struct:
# Use same folder structure as shelved data
dst_suffix = sf.relative_to(shelved_base)
dst = destination_path / dst_suffix
else:
# Flat structure, just add the filename to the destination path
dst = destination_path / sf.name
src_dsts.append((src, dst))
# Remove any destinations that already exist from copy list
src_dsts = [s_d for s_d in src_dsts if not s_d[1].exists()]
# Move files
logger.info('Copying files...')
pbar = tqdm(src_dsts, desc='Copying...')
for sf, df in pbar:
# Check for existence of destination path (can remove, done above)
if df.exists():
logger.debug('Destination file exists, skipping: {}'.format(sf.name))
continue
if not dryrun:
if not df.parent.exists():
os.makedirs(df.parent)
if transfer_method == TM_LINK:
os.link(sf, df)
else:
shutil.copy2(sf, df)
logger.info('File transfer complete.')
def scene_retreiver(scene_ids_path=None,
footprint_path=None,
destination_path=None,
use_shelved_struct=False,
transfer_method=TM_COPY,
out_footprint=None,
dryrun=False):
# Convert string paths to pathlib.Path
if scene_ids_path:
scene_ids_path = Path(scene_ids_path)
if footprint_path:
footprint_path = Path(footprint_path)
if destination_path:
destination_path = Path(destination_path)
# Load selection
selection = load_selection(scene_ids_path=scene_ids_path,
footprint_path=footprint_path)
# Locate source files
scenes = locate_scenes(selection=selection,
destination_path=destination_path)
# Copy to destination
copy_files(scenes=scenes, destination_path=destination_path,
use_shelved_struct=use_shelved_struct,
transfer_method=transfer_method, dryrun=dryrun)
if out_footprint:
if footprint_path:
logger.warning('Footprint selection provided - output footprint '
'will be identical.')
# If IDs were passed and a footprint is desired
# (otherwise would be identical to input footprint)
logger.info('Writing footprint to file: {}'.format(out_footprint))
write_gdf(selection, out_footprint=out_footprint)
logger.info('Footprint writing complete.')
if __name__ == '__main__':
parser = argparse.ArgumentParser(
description=('Scene retriever for Planet data. Input can either be '
'list of IDs or selection from onhand table.'))
parser.add_argument('--ids', type=os.path.abspath,
help='Path to list of IDs to retrieve.')
parser.add_argument('--footprint', type=os.path.abspath,
help='Path to footprint of scenes to retreive. Must '
'contain field: {}'.format(REQUIRED_FIELDS))
parser.add_argument('-d', '--destination', type=os.path.abspath,
help='Path to directory to write scenes to.')
parser.add_argument('-uss', '--use_shelved_struct', action='store_true',
help='Use the same folder structure in destination '
'directory that is used for shelving.')
parser.add_argument('--out_footprint', type=os.path.abspath,
help='Path to write footprint (only useful if '
'providing a list of IDs.)')
parser.add_argument('-tm', '--transfer_method', type=str,
choices=[TM_COPY, TM_LINK],
help='Transfer method to use.')
parser.add_argument('--dryrun', action='store_true',
help='Print actions without performing copy.')
args = parser.parse_args()
scene_ids_path = args.ids
footprint_path = args.footprint
destination_path = args.destination
use_shelved_struct = args.use_shelved_struct
out_footprint = args.out_footprint
transfer_method = args.transfer_method
dryrun = args.dryrun
scene_retreiver(scene_ids_path=scene_ids_path,
footprint_path=footprint_path,
destination_path=destination_path,
use_shelved_struct=use_shelved_struct,
out_footprint=out_footprint,
transfer_method=transfer_method,
dryrun=dryrun)