Commit 9f605ed4 authored by CEVAER's avatar CEVAER
Browse files

Added finding lon/lat best index in data corresponding to the best matching track point.

parent 96e47871
......@@ -5,32 +5,95 @@ import logging
import datetime
import os
import sys
from sqlalchemy import create_engine, and_
from sqlalchemy import create_engine, and_, func, cast
from sqlalchemy.orm import sessionmaker
from geoalchemy2 import Geometry
from cyclobs_orm import SimpleTrack
import xarray
import pandas as pd
import numpy as np
from geo_shapely import shape360
logging.basicConfig()
logger = logging.getLogger(os.path.basename(__file__))
logger.setLevel(logging.INFO)
def extractDateFromFilename(filename):
def extract_date_from_filename(filename):
datePart = "-".join(filename.split("_")[4:7])
return datetime.datetime.strptime(datePart, "%Y-%m-%d")
def getTrackPointsFromDatabase(session, smapDate):
reqTracks = session.query(SimpleTrack).filter(and_(SimpleTrack.date >= smapDate,
SimpleTrack.date < smapDate + datetime.timedelta(days=1))).all()
def get_track_points_from_database(session, smapDate):
reqTracks = session.query(SimpleTrack, func.ST_X(cast(SimpleTrack.geom, Geometry)).label("lon"),
func.ST_Y(cast(SimpleTrack.geom, Geometry)).label("lat")) \
.filter(and_(SimpleTrack.date >= smapDate,
SimpleTrack.date < smapDate + datetime.timedelta(days=1))).group_by(SimpleTrack).all()
return reqTracks
def processSmapFile(session, file):
def get_colocated_track_points(dataset, trackPoints, fileDate, deg_delta=3, time_delta=60):
trackPointOffsets = []
for trackPoint in trackPoints:
logger.debug(f"Track point : {trackPoint}")
sel = dataset.sel(lon=trackPoint["lon"], lat=trackPoint["lat"], method="nearest")
logger.debug(f"Point find : lon: {sel['lon'].values}, lat: {sel['lat'].values}, minute: {sel['minute'].values}")
lonIndex = np.where(dataset["lon"].data == sel["lon"].data)[0][0]
latIndex = np.where(dataset["lat"].data == sel["lat"].data)[0][0]
logger.debug(f"Point find : lon: {sel['lon'].values}, lat: {sel['lat'].values}, minute: {sel['minute'].values},"
f" lon_index: {lonIndex}, lat_index {latIndex}")
df = sel.to_dataframe()
# Storing the time offset between the data point and the track point
# The time offset is stored with all the track point data to ease later processing
for node in df.index:
if not pd.isna(df["minute"][node]):
pointDate = fileDate + df["minute"][node]
timeOffset = abs(trackPoint["date"] - pointDate)
logger.debug(f" Data point date: {pointDate}, Track point date: {trackPoint['date']},"
f" Time offset: {timeOffset}")
# If this is the first node processed OR that the previous nodes had NAN times OR that previous nodes
# offset times are greater than this one
if node == 0 or not "timeOffset" in trackPoint or trackPoint["timeOffset"] > timeOffset:
trackPoint["timeOffset"] = timeOffset
trackPoint["node"] = node
trackPoint["lonIndex"] = lonIndex
trackPoint["latIndex"] = latIndex
logger.debug(f"ISNOTNA")
if "timeOffset" in trackPoint:
trackPointOffsets.append(trackPoint)
keptTrackPoints = {}
# Getting the best track point for each sid (storm id).
# One SMAP file can contain several cyclones acquisitions so that is why we are doing this filtering per sid
for trackPoint in trackPointOffsets:
# TODO replace timedelta with database track sampling time
if trackPoint["timeOffset"] <= datetime.timedelta(minutes=7, seconds=30):
if trackPoint["sid"] not in keptTrackPoints:
keptTrackPoints[trackPoint["sid"]] = trackPoint
elif keptTrackPoints[trackPoint["sid"]]["timeOffset"] > trackPoint["timeOffset"]:
keptTrackPoints[trackPoint["sid"]] = trackPoint
return keptTrackPoints
def process_smap_file(session, file):
logger.debug(f"Processing {file}...")
fileDate = extractDateFromFilename(os.path.basename(file))
filename = os.path.basename(file)
fileDate = extract_date_from_filename(filename)
logger.debug(f"File date {fileDate}")
trackPoints = getTrackPointsFromDatabase(session, fileDate)
logger.debug(f"{trackPoints}")
trakcPointsPerSid = {trackPoint.sid: trackPoint for trackPoint in trackPoints}
trackPoints = get_track_points_from_database(session, fileDate)
logger.debug(f"Number of track point found : {len(trackPoints)}")
trackPoints = [{"sid": trackPoint[0].sid, "lon": shape360(trackPoint.lon, 0)[0],
"lat": trackPoint.lat,
"date": trackPoint[0].date} for trackPoint in trackPoints]
dataset = xarray.open_dataset(file)
keptTrackPoints = get_colocated_track_points(dataset, trackPoints, fileDate)
logger.info(f"For file {filename} Kept track that will be used to extract SMAP data: {keptTrackPoints}")
if __name__ == "__main__":
......@@ -39,7 +102,8 @@ if __name__ == "__main__":
"""
parser = argparse.ArgumentParser(description=description)
parser.add_argument("--dbd", action="store", type=str, required=True, help='database (postgresql://user:pass@host:5432/base_name)')
parser.add_argument("--dbd", action="store", type=str, required=True,
help='database (postgresql://user:pass@host:5432/base_name)')
parser.add_argument("--debug", action="store_true", default=False, help="Run in debug mode (verbose output)")
args = parser.parse_args()
......@@ -51,8 +115,14 @@ if __name__ == "__main__":
engine = create_engine(args.dbd)
Session = sessionmaker(bind=engine)
smapTests = ["/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/110/RSS_smap_wind_daily_2020_04_19_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/025/RSS_smap_wind_daily_2020_01_25_v01.0.nc"
]
smapTests = [
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/205/RSS_smap_wind_daily_2020_07_23_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/206/RSS_smap_wind_daily_2020_07_24_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/207/RSS_smap_wind_daily_2020_07_25_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/208/RSS_smap_wind_daily_2020_07_26_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/209/RSS_smap_wind_daily_2020_07_27_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/210/RSS_smap_wind_daily_2020_07_28_v01.0.nc",
"/home/datawork-cersat-public/provider/remss/satellite/l3/smap/smap/wind/v1.0/daily/2020/211/RSS_smap_wind_daily_2020_07_29_v01.0.nc",
]
for f in smapTests:
processSmapFile(Session(), f)
process_smap_file(Session(), f)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment