Commit 42e5e429 authored by CEVAER's avatar CEVAER
Browse files

Filtering low quality SMOS data

parent 3714d9c6
......@@ -7,6 +7,7 @@ import os
import sys
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import numpy as np
# logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)
......@@ -21,6 +22,35 @@ def extract_date_from_filename(filename):
return datetime.datetime.strptime(date_part, "%Y%m%d")
def set_nan_low_quality(dataset):
Sets to NaN the dataset values corresponding to a SMOS pixel quality level > 1
The data with quality_level > 1 is considered bad quality thus is removed.
This is specified in SMOS daily product specification :
dataset : xarray.Dataset already containing data cropped around cyclone.
Same dataset but with NaN sets to all variables where variable quality_level is > 1
quality_col = "quality_level"
# set nan to quality levels > 1 because
with np.errstate(invalid='ignore'): # ignoring warnings caused by Nans in array
dataset[quality_col] = dataset[quality_col].where(dataset[quality_col].data <= 1)
# Sets equivalents NaNs to other variables
for var in dataset.data_vars:
if var != "measurement_time": # Not updating measurement_time (variable doesn't accept np.nan and will be removed afterwards anyway)
dataset[var].data[np.isnan(dataset[quality_col].data)] = np.nan
return dataset
if __name__ == "__main__":
description = """
Read SMOS netCDF files from a directory, extract the wind data of cyclones and save it into a new netCDF file.
......@@ -50,6 +80,7 @@ if __name__ == "__main__":
engine = create_engine(args.dbd, pool_size=50, max_overflow=0)
Session = sessionmaker(bind=engine)
# Attributes that'll be deleted
attrs_to_del = ["aprrox_local_equatorial_crossing_time", "swath_sector", "geospatial_bounds",
"geospatial_lat_min", "geospatial_lat_max", "geospatial_lon_min", "geospatial_lon_max"]
......@@ -61,4 +92,4 @@ if __name__ == "__main__":
attrs_to_del=attrs_to_del, var_to_del=["measurement_time"], wind_col="wind_speed",
lat_col="lat", lon_col="lon", pass_col=None, pass_width=1200,
filename_format="SM_OPER_MIR_<time>_<sid>.nc", specfic_func=set_nan_low_quality)
......@@ -189,7 +189,8 @@ def extract_write_cyclone_data(dataset, kept_track_points, filename, output_path
for sid, track_point in kept_track_points.items():
# Km per deg in latitude
km_per_deg_lat = 111
......@@ -249,6 +250,9 @@ def extract_write_cyclone_data(dataset, kept_track_points, filename, output_path
sel = set_nan_outside_time_offset(sel, track_point, time_col_name=time_col_name, wind_col_name=wind_col_name,
if specific_func is not None:
sel = specific_func(sel)
# Extracting min and max time to set attributes on NetCDF
min_time, max_time = extract_start_stop_measure(sel, time_col_name)
if max_time - min_time > datetime.timedelta(minutes=30):
......@@ -310,7 +314,8 @@ def extract_write_cyclone_data(dataset, kept_track_points, filename, output_path
def process_file(session, file, output_path, extract_date_func, attrs_to_del, var_to_del, wind_col,
time_col, lat_col, lon_col, filename_format, pass_width, pass_col=None, var_attr_edit={}):
time_col, lat_col, lon_col, filename_format, pass_width, pass_col=None, var_attr_edit={},
specfic_func=None):"Processing {file}...")
filename = os.path.basename(file)
......@@ -338,7 +343,8 @@ def process_file(session, file, output_path, extract_date_func, attrs_to_del, va
time_col_name=time_col, lat_col_name=lat_col, lon_col_name=lon_col,
pass_col_name=pass_col, attrs_to_del=attrs_to_del, var_to_del=var_to_del,
is_full_time=full_time, extract_size_km=pass_width * 2, var_attr_edit=var_attr_edit)
is_full_time=full_time, extract_size_km=pass_width * 2, var_attr_edit=var_attr_edit,
logger.warning(f"Filename {filename} contains only NaN values for {wind_col} column. It will not be processed.")
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment