I just gave everyone access to the code. You can also see the code below
# Jupyter notebook related
%reload_ext autoreload
%autoreload 2
%matplotlib inline
# Built-in modules
import os
import datetime
import itertools
from aenum import MultiValueEnum
from enum import Enum
# Basics of Python data handling and visualization
import numpy as np
#np.random.seed(42)
import geopandas as gpd
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib.colors import ListedColormap, BoundaryNorm
from shapely.geometry import Polygon
from tqdm.auto import tqdm
# Machine learning
import lightgbm as lgb
import joblib
from sklearn import metrics
from sklearn import preprocessing
from eolearn.core import(
EOTask,
EOPatch,
EOWorkflow,
linearly_connect_tasks,
FeatureType,
OverwritePermission,
LoadTask,
SaveTask,
EOExecutor,
MergeFeatureTask,
)
from eolearn.io import SentinelHubInputTask, VectorImportTask, ExportToTiffTask
from eolearn.geometry import VectorToRasterTask, ErosionTask
from eolearn.features import LinearInterpolationTask, SimpleFilterTask, NormalizedDifferenceIndexTask
from eolearn.features import LinearInterpolationTask
from eolearn.ml_tools import FractionSamplingTask
from sentinelhub import UtmZoneSplitter, DataCollection, SHConfig, CRS
# Folder where data for running the notebook is stored
DATA_FOLDER = os.path.join(".", "example_data")
# Locations for collected data and intermediate results
EOPATCH_FOLDER = os.path.join(".", "eopatches")
EOPATCH_SAMPLES_FOLDER = os.path.join(".", "eopatches_sampled")
RESULTS_FOLDER = os.path.join(".", "results")
for folder in (DATA_FOLDER, EOPATCH_FOLDER, EOPATCH_SAMPLES_FOLDER, RESULTS_FOLDER):
os.makedirs(folder, exist_ok=True)
# Load geojson file
aoi = gpd.read_file(os.path.join(DATA_FOLDER, 'new_coordonates', 'EO_Browser_2480_36','Kossou_2480_36_km2.geojson'))
# Plot country
aoi.plot()
plt.axis('off');
#config crs
aoi_crs = CRS.UTM_30N
aoi = aoi.to_crs(epsg=aoi_crs.epsg)
aoi.plot()
plt.axis('off')
aoi = gpd.read_file(os.path.join(DATA_FOLDER, 'new_coordonates', 'EO_Browser_2480_36','Kossou_2480_36_km2.geojson'))
aoi = aoi.to_crs(epsg=aoi_crs.epsg)
# Create a splitter to obtain a list of bboxes with 5km sides
bbox_splitter = UtmZoneSplitter([aoi_shape], aoi.crs, 5000)
bbox_list = np.array(bbox_splitter.get_bbox_list())
info_list = np.array(bbox_splitter.get_info_list())
len(bbox_list)
# Prepare info of selected EOPatches
geometry = [Polygon(bbox.get_polygon()) for bbox in bbox_list]
idxs = [info["index"] for info in info_list]
idxs_x = [info["index_x"] for info in info_list]
idxs_y = [info["index_y"] for info in info_list]
bbox_gdf = gpd.GeoDataFrame({"index": idxs, "index_x": idxs_x, "index_y": idxs_y}, crs=aoi.crs, geometry=geometry)
# select a 5x5 area (id of center patch)
ID = 58
# Obtain surrounding 5x5 patches
patchIDs = []
for idx, (bbox, info) in enumerate(zip(bbox_list, info_list)):
if abs(info["index_x"] - info_list[ID]["index_x"]) <= 2 and abs(info["index_y"] - info_list[ID]["index_y"]) <= 2:
patchIDs.append(idx)
# Check if final size is 5x5
if len(patchIDs) != 5 * 5:
print("Warning! Use a different central patch ID, this one is on the border.")
# Change the order of the patches (useful for plotting)
patchIDs = np.transpose(np.fliplr(np.array(patchIDs).reshape(5, 5))).ravel()
# Save to shapefile
shapefile_name = "grid_kossou_983_29_500x500.gpkg"
bbox_gdf.to_file(os.path.join(RESULTS_FOLDER, shapefile_name), driver="GPKG")
# Display bboxes over area
fig, ax = plt.subplots(figsize=(30, 30))
ax.set_title("Selected 5x5 tiles from kossou", fontsize=25)
aoi.plot(ax=ax, facecolor="w", edgecolor="b", alpha=0.5)
bbox_gdf.plot(ax=ax, facecolor="w", edgecolor="r", alpha=0.5)
for bbox, info in zip(bbox_list, info_list):
geo = bbox.geometry
ax.text(geo.centroid.x, geo.centroid.y, info["index"], ha="center", va="center")
# Mark bboxes of selected area
bbox_gdf[bbox_gdf.index.isin(patchIDs)].plot(ax=ax, facecolor="g", edgecolor="r", alpha=0.5)
plt.axis("off");
#Download Data
class SentinelHubValidDataTask(EOTask):
"""
Combine Sen2Cor's classification map with `IS_DATA` to define a `VALID_DATA_SH` mask
The SentinelHub's cloud mask is asumed to be found in eopatch.mask['CLM']
"""
def __init__(self, output_feature):
self.output_feature = output_feature
def execute(self, eopatch):
eopatch[self.output_feature] = eopatch.mask["IS_DATA"].astype(bool) & (~eopatch.mask["CLM"].astype(bool))
return eopatch
class AddValidCountTask(EOTask):
"""
The task counts number of valid observations in time-series and stores the results in the timeless mask.
"""
def __init__(self, count_what, feature_name):
self.what = count_what
self.name = feature_name
def execute(self, eopatch):
eopatch[FeatureType.MASK_TIMELESS, self.name] = np.count_nonzero(eopatch.mask[self.what], axis=0)
return eopatch
# BAND DATA
# Add a request for S2 bands.
# Here we also do a simple filter of cloudy scenes (on tile level).
# The s2cloudless masks and probabilities are requested via additional data.
band_names = ["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B11", "B12"]
add_data = SentinelHubInputTask(
bands_feature=(FeatureType.DATA, "BANDS"),
bands=band_names,
resolution=20,
maxcc=0.8,
time_difference=datetime.timedelta(minutes=120),
data_collection=DataCollection.SENTINEL2_L1C,
additional_data=[(FeatureType.MASK, "dataMask", "IS_DATA"), (FeatureType.MASK, "CLM"), (FeatureType.DATA, "CLP")],
config=config,
max_threads=5,
)
scl = SentinelHubInputTask(
data_collection=DataCollection.SENTINEL2_L2A,
bands=["B01", "B02", "B03", "B04", "B05", "B06", "B07", "B08", "B8A", "B09", "B11", "B12"],
bands_feature=(FeatureType.DATA, "L2A_data"),
additional_data=[(FeatureType.MASK, "SCL")],
resolution=20,
maxcc=0.8,
time_difference=datetime.timedelta(minutes=120),
config=config,
max_threads=3,
)
# CALCULATING NEW FEATURES
# NDVI: (B08 - B04)/(B08 + B04)
# NDWI: (B03 - B08)/(B03 + B08)
# NDBI: (B11 - B08)/(B11 + B08)
ndvi = NormalizedDifferenceIndexTask(
(FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDVI"), [band_names.index("B08"), band_names.index("B04")]
)
ndwi = NormalizedDifferenceIndexTask(
(FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDWI"), [band_names.index("B03"), band_names.index("B08")]
)
ndbi = NormalizedDifferenceIndexTask(
(FeatureType.DATA, "BANDS"), (FeatureType.DATA, "NDBI"), [band_names.index("B11"), band_names.index("B08")]
)
# VALIDITY MASK
# Validate pixels using SentinelHub's cloud detection mask and region of acquisition
add_sh_validmask = SentinelHubValidDataTask((FeatureType.MASK, "IS_VALID"))
# COUNTING VALID PIXELS
# Count the number of valid observations per pixel using valid data mask
add_valid_count = AddValidCountTask("IS_VALID", "VALID_COUNT")
# SAVING TO OUTPUT (if needed)
save = SaveTask(EOPATCH_FOLDER, overwrite_permission=OverwritePermission.OVERWRITE_PATCH)
class LULC(MultiValueEnum):
"""Enum class containing basic LULC types"""
NO_DATA = "No Data", 0, "#ffffff"
WATER = "Water", 1, "#069af3"
FOREST = "Forest", 2, "#054907"
BUILDUP = "BuildUp", 3, "#dc143c"
BARRENLAND = "BarrenLand", 4, "#a6a6a6"
SAVANNA = "Savanna", 5, "#564234"
@property
def id(self):
return self.values[1]
@property
def color(self):
return self.values[2]
# Reference colormap things
lulc_cmap = ListedColormap([x.color for x in LULC], name="lulc_cmap")
lulc_norm = BoundaryNorm([x - 0.5 for x in range(len(LULC) + 1)], lulc_cmap.N)
# VALIDITY MASK
# Validate pixels using SentinelHub's cloud detection mask and region of acquisition
add_sh_validmask = SentinelHubValidDataTask((FeatureType.MASK, "IS_VALID"))
# COUNTING VALID PIXELS
# Count the number of valid observations per pixel using valid data mask
add_valid_count = AddValidCountTask("IS_VALID", "VALID_COUNT")
# SAVING TO OUTPUT (if needed)
save = SaveTask(EOPATCH_FOLDER, overwrite_permission=OverwritePermission.OVERWRITE_PATCH)
#Importing reference data
fg = os.path.join(DATA_FOLDER, 'new_coordonates', 'EO_Browser_2480_36', 'kossou_classification_2480_36_sec.gpkg')
data = gpd.read_file(fg)
outfg = os.path.join(DATA_FOLDER, "Kossou_final_classification_ref_map_5_788_02.gpkg")
data.to_file(outfg, driver="GPKG")
outfg = os.path.join(DATA_FOLDER, "Kossou_final_classification_ref_map_5_788_02.gpkg")
vector_feature = FeatureType.VECTOR_TIMELESS, "LULC_REFERENCE"
vector_import_task = VectorImportTask(vector_feature, outfg)
rasterization_task = VectorToRasterTask(
vector_feature,
(FeatureType.MASK_TIMELESS, "LULC"),
values_column="DN",
raster_shape=(FeatureType.MASK, "IS_DATA"),
raster_dtype=np.uint8,
)
# Define the workflow
workflow_nodes = linearly_connect_tasks(
add_data, ndvi, ndwi, ndbi, add_sh_validmask, add_valid_count, scl, vector_import_task, rasterization_task, save
)
workflow = EOWorkflow(workflow_nodes)
# Let's visualize it
workflow.dependency_graph()
%%time
# Time interval for the SH request
time_interval = ["2022-01-01", "2022-03-31"]
# Define additional parameters of the workflow
input_node = workflow_nodes[0]
save_node = workflow_nodes[-1]
execution_args = []
for idx, bbox in enumerate(bbox_list[patchIDs]):
execution_args.append(
{
input_node: {"bbox": bbox, "time_interval": time_interval},
save_node: {"eopatch_folder": f"eopatch_{idx}"},
}
)
# Execute the workflow
executor = EOExecutor(workflow, execution_args, save_logs=True)
executor.run(workers=4)
executor.make_report()
failed_ids = executor.get_failed_executions()
if failed_ids:
raise RuntimeError(
f"Execution failed EOPatches with IDs:\n{failed_ids}\n"
f"For more info check report at {executor.get_report_path()}"
)