I am using the following script to download NDVI and GCVI data for field polygons. Each field has average size between 50 - 100 ha and the duration for which I am getting the data is 1 year. I have consumed about 70,000 processing units for nearly 2000 fields, which seems a bit excessive to me based on my previous experience with Sentinelhub statistical API. Can someone take a look at the script below and suggest any obvious issues in terms of speed (I realize that I can follow the reduce processing cost
section here: Sentinel Hub Statistical API — Sentinel Hub 3.10.0 documentation) and am planning to look into that next:
evalscript = """
//VERSION=3
function setup() {
return {
input: [{
bands:[
"B03",
"B04",
"B08",
"CLD",
"dataMask"]}],
output:[
{
id: "CLD",
bands: 1
},
{
id: "GCVI",
bands: 1
},
{
id: "NDVI",
bands: 1
},
{
id: "dataMask",
bands: 1
}]
}
}
function evaluatePixel(samples) {
var validDataMask = 1
if (samples.B08 + samples.B04 == 0) {
validDataMask = 0
}
return {
CLD: [samples.CLD],
GCVI: [(samples.B08/samples.B03)-1],
NDVI: [index(samples.B08, samples.B04)],
// Exclude nodata pixels, pixels where NDVI is not defined and
// water pixels from statistics calculation
dataMask: [samples.dataMask * validDataMask]
};
}
"""
input_data = SentinelHubStatistical.input_data(DataCollection.SENTINEL2_L2A)
client = SentinelHubStatisticalDownloadClient(config=config)
gdf = gpd.read_file('compiled.shp')
# Convert to WGS84
gdf = gdf.to_crs(epsg=4326)
frames = []
for idx, row in tqdm(gdf.iterrows(), total=len(gdf)):
yearly_time_interval = date(row['year'], 1, 1), date(row['year'], 12, 31)
aggregation = SentinelHubStatistical.aggregation(
evalscript=evalscript, time_interval=yearly_time_interval, aggregation_interval="P1D"
)
request = SentinelHubStatistical(
aggregation=aggregation,
input_data=[input_data],
geometry=Geometry(row.geometry, crs=CRS(gdf.crs)),
config=config,
)
download_request = request.download_list[0]
vi_stats = client.download(download_request)