-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathplanet_search_functions.py
338 lines (252 loc) · 13.9 KB
/
planet_search_functions.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import numpy as np
import itertools
import json
import pandas as pd
from shapely.geometry import Polygon
import datetime
import subprocess
import os
from pathlib import Path
def check_overlap(features, aoi, min_overlap=99):
"""
Check the overlap percentage between a list of scenes and a given reference polygon.
Args:
features (list): List of scenes.
aoi (str): Path to the reference polygon file.
min_overlap (int or float) Minimum overlap percentage (default: 99).
Returns:
list: List of scenes that satisfy the minimum overlap.
"""
with open(aoi) as f:
gj = json.load(f)
aoi = Polygon([tuple(coords) for coords in gj["features"][0]["geometry"]["coordinates"][0]])
keep = np.full(len(features), False, dtype=bool)
for i, feat in enumerate(features):
geom = feat["geometry"]["coordinates"][0]
p = Polygon([tuple(coords) for coords in geom])
intersection = aoi.intersection(p)
overlap = intersection.area / aoi.area * 100
if overlap >= min_overlap:
keep[i] = True
return list(itertools.compress(features, keep))
def get_orbit(features, direction="NE"):
"""
Filter features by orbit direction.
It is not possible to filter by orbit given the feature properties from Planet.
Thus, the function naively finds the angle to north and sorts accordingly.
Only relevant for PS2 (Dove-C) data.
Args:
features (list): List of features to filter.
direction (str): Orbit direction ("NE" for Northeast, "NW" for Northwest) (default: "NE").
Returns:
list: Filtered list of features.
"""
keep = np.full(len(features), False, dtype=bool)
for i, feat in enumerate(features):
geom = feat["geometry"]["coordinates"][0][0:-1] # Remove last entry as it is the same as the first to close the poly
# Corners are unfortunately not always in the right order
lon = [c[0] for c in geom]
lat = [c[1] for c in geom]
left = np.argsort(lon)[0:2]
leftlat = np.array(lat)[left]
# Now find which of these two points has the lowest latitude
if np.argmin(leftlat) == 0 and direction == "NE": # NE case
keep[i] = True
elif np.argmin(leftlat) == 1 and direction == "NW": # NW case
keep[i] = True
return list(itertools.compress(features, keep))
def search_planet_catalog(instrument, aoi = None, ids = None, date_start = "2010-01-01", date_stop = "2040-01-01", view_ang_min = 0, view_ang_max = 20, cloud_cover_max = 1, path = "./"):
"""
Search the planet catalog for scenes based on provided criteria.
Make sure to provide either a list of ids or a GeoJSON that outlines the AOI.
Args:
instrument (str): Instrument name (PSB.SD, PS2.SD or PS2)
aoi (str): Path to the GeoJSON file outlining the AOI (default: None).
ids (list): List of scene ids (default: None).
date_start (str): Start date in the format "YYYY-MM-DD" (default: "2010-01-01").
date_stop (str): Stop date in the format "YYYY-MM-DD" (default: "2040-01-01").
view_ang_min (int): Minimum view angle (default: 0).
view_ang_max (int): Maximum view angle (default: 20).
cloud_cover_max (int): Maximum cloud cover (default: 1).
path (str): Path to store the search results (default: "./").
Returns:
str: Path to the generated search GeoJSON file.
"""
if ids is not None:
print("Searching for scenes based on the provided ids...") # add --std-quality to exclude test scenes
#search for potential scenes
search = f"planet data filter --date-range acquired gte {date_start} --range cloud_cover lte {cloud_cover_max} --date-range acquired lte {date_stop} --string-in instrument {instrument} --string-in id {','.join(ids)} --range view_angle gte {view_ang_min} --range view_angle lte {view_ang_max} --string-in ground_control true > {path}filter.json"
elif aoi is not None:
print("Searching for scenes based on the provided geometry...")
search = f"planet data filter --date-range acquired gte {date_start} --range cloud_cover lte {cloud_cover_max} --date-range acquired lte {date_stop} --string-in instrument {instrument} --geom {aoi} --string-in ground_control true --range view_angle gte {view_ang_min} --range view_angle lte {view_ang_max} > {path}filter.json"
else:
print("Please provide either a list of ids or a valid geometry to constrain the search.")
return
result = subprocess.run(search, shell = True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.stderr != "":
print(result.stderr)
search = f"planet data search PSScene --limit 0 --filter {path}filter.json > {path}search.geojson"
result = subprocess.run(search, shell = True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
if result.stderr != "":
print(result.stderr)
return f"{path}search.geojson"
def refine_search_and_convert_to_csv(searchfile, aoi, instrument = "PSB.SD", orbit = "NE", min_overlap = 99, az_angle = 10):
"""
Filter the result from search_planet_catalog to have images that overlap the AOI polygon by at least a minimum percentage.
Filter data by orbit (only relevant for Dove-C).
Calculate true view angle considering look direction.
Args:
searchfile (str): Path to the search file.
aoi (str): Path to the reference polygon GeoJSON file.
instrument (str): Instrument name. Not relevant if not PS2 or PS2.SD (default: "PSB.SD").
orbit (str): Orbit direction. Not relevant if not PS2. ("NE" for Northeast, "NW" for Northwest) (default: "NE").
min_overlap (int or float): Minimum overlap percentage (default: 99).
az_angle (int or float): Estimated azimuth angle (azAngle in XML metadata), i.e. flight line angle to north (default: 10)
Returns:
pandas.DataFrame: DataFrame containing the filtered information.
"""
gj = [json.loads(line) for line in open(searchfile, "r")]
if (instrument == "PS2") or (instrument == "PS2.SD"):
features = get_orbit(gj, orbit)
else:
features = gj
features = check_overlap(features, aoi = aoi, min_overlap = min_overlap)
df = pd.DataFrame({"ids":[f["id"] for f in features], "view_angle":[f["properties"]["view_angle"] for f in features], "gsd":[f["properties"]["gsd"] for f in features],"sat_az":[f["properties"]["satellite_azimuth"] for f in features],"quality":[f["properties"]["quality_category"] for f in features],
"datetime": [datetime.datetime.strptime(f["properties"]["acquired"],"%Y-%m-%dT%H:%M:%S.%fZ") for f in features], "footprint":[f["geometry"]["coordinates"][0] for f in features], "sun_azimuth":[f["properties"]["sun_azimuth"] for f in features],"sun_elevation":[f["properties"]["sun_elevation"] for f in features]})
df["date"] = df['datetime'].dt.date
#calculating true view angle
# to distinguish right from left looking, we assume an azimuth angle (scan line angle to north) of 10 degrees (typically ranges between 7 and 12 degrees)
#for precise info, xml metadata would need to be downloaded, but using 10 degrees shound be sufficient in 99% of cases
df["true_view_angle"] = df.view_angle
df.true_view_angle[(df.sat_az>(180+az_angle)) | (df.sat_az<(az_angle))] = df.true_view_angle*-1
try: #remove old searchfile
os.remove(searchfile)
except OSError:
pass
print("Updating searchfile...")
with open(searchfile, 'a') as outfile:
for f in features:
#print(f)
json.dump(f, outfile, indent = None)
outfile.write('\n')
print(f"Found {len(features)} scenes covering min. {min_overlap}% of the given AOI.")
return df
def find_common_perspectives(df, va_diff_thresh = 0.5, min_group_size = 5, min_dt = 1, searchfile = None):
"""
Group scenes that fulfill search criteria into groups with a common satellite perspective.
Args:
df (pandas.DataFrame): DataFrame containing scene information.
va_diff_thresh (float): True view angle threshold for considering scenes to have a common perspective (default: 0.5).
min_group_size (int): Minimum number of scenes required to form a group (default: 5).
min_dt (int): Minimum temporal baseline between scenes in a group (default: 1).
searchfile (str): path and name of search GeoJSON. Will be updated if provided. (default: None)
Returns:
pandas.DataFrame: DataFrame containing grouped scenes.
"""
pd.options.mode.chained_assignment = None
groups = pd.DataFrame(columns = [*df.columns,'group_id'])
group_id = 0
df = df.reset_index(drop = True)
for idx in range(len(df)):
scene = df.iloc[[idx]]
comp = df.copy()
comp = comp.loc[~comp.ids.isin(scene.ids)]
comp = comp.loc[~comp.ids.isin((groups.ids))].reset_index(drop = True) #do not double assign scenes to groups
comp["va_diff"] = abs(comp.true_view_angle - scene.true_view_angle.iloc[0])
good_matches = comp.loc[comp.va_diff <= va_diff_thresh]
good_matches = good_matches[df.columns] #remove va_diff col
if len(good_matches) >= min_group_size:
good_matches = pd.concat([good_matches, scene])
good_matches["group_id"] = group_id
group_id += 1
groups = pd.concat([groups,good_matches], ignore_index=True)
if min_dt > 1:
old_groups = groups.copy()
groups = pd.DataFrame(columns = [*df.columns,'group_id'])
for gi in range(group_id):
group = old_groups.loc[old_groups.group_id == gi]
group = group.sort_values("datetime").reset_index(drop = True)
#discard scenes to stay within dt limits
dt = group.datetime.diff()
dt.iloc[0] = datetime.timedelta(days = min_dt)
group = group.loc[dt >= datetime.timedelta(days = min_dt)]
groups = pd.concat([groups,group], ignore_index=True)
#TODO: check if groups then still have required size
if searchfile is not None:
print("Updating searchfile...")
features = [json.loads(line) for line in open(searchfile, "r")]
features = [f for f in features if groups.ids.str.contains(f["id"]).any()]
os.remove(searchfile)
with open(searchfile, 'a') as outfile:
for f in features:
f["properties"]["group_id"] = groups.group_id[groups.ids == f["id"]].iloc[0]
json.dump(f, outfile, indent = None)
outfile.write('\n')
#TODO: could update the search.json file to only keep relevant scenes and add group attribute
if len(groups) == 0:
print("Could not find sufficient scenes for groups of relevant size. Try to widen your search parameters.")
return
else:
print(f"I found {groups.group_id.nunique()} potential correlation groups.")
return groups
def suggest_dem_pairs(scenes, min_va = 5, max_dt = 30):
"""
Suggest image pairs for DEM generation.
Args:
scenes (DataFrame): DataFrame containing information about the scenes.
min_va (float, optional): Minimum view angle for scene inclusion. Defaults to 5.
max_dt (int, optional): Maximum time difference (in days) between scenes. Defaults to 30.
Returns:
DataFrame: Pandas DataFrame containing pairs of DEM images and their associated information.
"""
df = scenes.loc[scenes.view_angle >= min_va]
df = df.reset_index(drop = True)
pairs = []
for idx, scene in df.iterrows():
comp = df.copy()
comp = comp.loc[comp.ids != scene.ids]
comp["va_diff"] = abs(comp.true_view_angle - scene.true_view_angle)
comp["dt"] = comp["datetime"] -scene["datetime"]
comp = comp.loc[comp.dt > datetime.timedelta(days = 0)] #only compare to newer acquisitions to not have douple pairs
comp = comp.loc[comp.dt <= datetime.timedelta(days = max_dt)]
if len(comp) > 0:
max_vad = comp.va_diff.max()
good = comp.loc[comp.va_diff == max_vad]
pairs.append({
"img1": scene.ids,
"img2": list(good.ids),
"dt": list(good.dt),
"true_va_diff": good.va_diff})
pairs = pd.DataFrame.from_records(pairs).explode(["img2", "dt", "true_va_diff"]).reset_index(drop = True)
return pairs
def download_xml_metadata(ids, out_dir = None):
"""
Download XML metadata files for the provided scene ids.
Args:
ids (list): List of scene ids.
out_dir (str): Output directory to store the downloaded files (default: Home directory).
"""
if out_dir is None:
out_dir= str(Path.home())
while True:
user_in = input(f"Your request will download {len(ids)} metadata files. Continue? [y/n]")
if user_in == "n":
print("Exiting...")
return
elif user_in == "y":
break
else:
print("Please provide a valid input!")
#only one asset can be activated at once
#more info: https://planet-sdk-for-python-v2.readthedocs.io/en/latest/cli/cli-data/#stats
for i in ids:
cmd = f"planet data asset-activate PSScene {i} basic_analytic_4b_xml"
subprocess.run(cmd, shell = True)
print("Waiting for assets to activate...")
for i in ids:
cmd = f"planet data asset-wait PSScene {i} basic_analytic_4b_xml && \
planet data asset-download PSScene {i} basic_analytic_4b_xml --directory {out_dir}"
subprocess.run(cmd, shell = True)
print(f"Downloaded files are stored under {out_dir}")