Hello everyone,
I am currently working on a project that requires a year’s worth of Sentinel-2 L1C multi-band imagery, along with cloud masks and cloud properties, specifically for the China region. However, I am encountering some challenges with my download code. At times, it results in downloading large amounts of blank areas, wasting valuable resources. Additionally, I am unsure if the areas I am downloading are correct.
I have attached my data download and processing code below. I would greatly appreciate any guidance or suggestions to improve the accuracy and efficiency of my downloads.
Thank you very much for your help!
# Define your large GeoJSON area
geojson_coords = [[[69.7803771496, 9.8819587269], [134.5557677746, 9.8819587269], [134.5557677746, 54.673645043], [69.7803771496, 54.673645043], [69.7803771496, 9.8819587269]]]
# Create a Polygon object
polygon = shapely.geometry.Polygon(geojson_coords[0])
# Set the WGS84 coordinate reference system
wgs84 = pyproj.CRS("EPSG:4326")
# Get the UTM zone for the first coordinate
utm_coords = utm.from_latlon(geojson_coords[0][0][1], geojson_coords[0][0][0])
utm_zone = utm_coords[2]
utm_crs = pyproj.CRS(f"EPSG:326{utm_zone}")
# Define the transformer
project = pyproj.Transformer.from_crs(wgs84, utm_crs, always_xy=True).transform
# Transform the coordinates
utm_polygon = shapely.ops.transform(project, polygon)
# Get the bounding box
minx, miny, maxx, maxy = utm_polygon.bounds
# Define the grid size for splitting the large area, e.g., 10km x 10km
grid_size = 10000
# Create an empty list to store all BBox objects
bbox_list = []
# Split the large area into smaller areas based on the grid
x_coords = list(range(int(minx), int(maxx), grid_size))
y_coords = list(range(int(miny), int(maxy), grid_size))
for x in x_coords:
for y in y_coords:
# Create a bounding box for each sub-area
sub_bbox = BBox(((x, y), (x + grid_size, y + grid_size)), crs=CRS(f"326{utm_zone}"))
bbox_list.append(sub_bbox)
# Now you can iterate over bbox_list and perform the same processing for each BBox object
for idx, bbox in enumerate(bbox_list):
print(f"Processing BBox {idx + 1}/{len(bbox_list)}: {bbox}")
data_collection = DataCollection.SENTINEL2_L1C
evalscript = generate_evalscript(
data_collection=data_collection,
meta_bands=["dataMask"],
merged_bands_output="bands",
prioritize_dn=False,
)
request = SentinelHubRequest(
evalscript=evalscript,
input_data=[SentinelHubRequest.input_data(data_collection=data_collection, time_interval="2020-01-01")],
responses=[
SentinelHubRequest.output_response("bands", MimeType.TIFF),
SentinelHubRequest.output_response("dataMask", MimeType.TIFF),
],
bbox=bbox,
resolution=(10, 10),
config=config,
)
try:
data = request.get_data()[0]
bands = data["bands.tif"]
mask = data["dataMask.tif"]
print("Bands shape:", bands.shape)
print("Mask shape:", mask.shape)
# Initialize the cloud detector and perform cloud detection
cloud_detector = S2PixelCloudDetector(threshold=0.4, average_over=4, dilation_size=2, all_bands=True)
cloud_prob = cloud_detector.get_cloud_probability_maps(bands[np.newaxis, ...])[0]
cloud_mask = cloud_detector.get_cloud_masks(bands[np.newaxis, ...])[0]
# Check if cloud_prob is all zero
if np.all(cloud_prob == 0):
print(f"Skipping BBox {idx + 1} due to no clouds detected.")
continue
# Get the timestamp for file naming
timestamp = dt.datetime.strptime("2020-01-01", "%Y-%m-%d").strftime("%Y%m%d")
bands_file_base = f"E:/s2cloud/data/sentinel2_bands_{timestamp}_bbox_{idx + 1}"
cloud_file_base = f"E:/s2cloud/data/sentinel2_cloud_{timestamp}_bbox_{idx + 1}"
# Save bands data as npy file
np.save(f"{bands_file_base}.npy", bands)
# Save cloud_mask and cloud_prob data as npy file
cloud_data = np.stack((cloud_mask, cloud_prob), axis=-1)
np.save(f"{cloud_file_base}.npy", cloud_data)
# Extract geospatial information
crs = bbox.crs.value
transform = from_bounds(*bbox, width=bands.shape[1], height=bands.shape[0]).to_gdal()
# Save metadata to JSON file
metadata = {
"bbox": list(bbox),
"crs": str(crs), # Save EPSG code as string
"transform": transform # Save transform information as list
}
with open(f"{bands_file_base}.json", "w") as f:
json.dump(metadata, f)
except requests.exceptions.RequestException as e:
print(f"Skipping BBox {idx + 1} due to request error: {e}")
continue
except Exception as e:
print(f"Skipping BBox {idx + 1} due to an unexpected error: {e}")
continue