Commit e01e8365 authored by PIOLLE's avatar PIOLLE
Browse files

import from cerbereutils

parent c2a15c98
=========================
cerbereutils installation
=========================
Requirements
============
Your Python package manager should handle most of the dependencies by himself, but there are some gotchas:
- Numpy is required by the setup.py scripts of other modules, so it must be installed beforehand (see https://github.com/pypa/pip/issues/25)::
pip install numpy==1.7.1
- the Ifremer cerbere library is required
cerbereutils
============
Just use your package manager to perform installation::
pip install .
include *.txt
include *.rst
#! /usr/bin/env python
# encoding: utf-8
"""
cerresample
===========
Remap a product on top of another, various resampling methods such as :
* closest neighbour
* bilinear interpolation
* gaussian
:copyright: Copyright 2013 Ifremer / Cersat.
:license: Released under GPL v3 license, see :ref:`license`.
.. sectionauthor:: Jeff Piolle <jfpiolle@ifremer.fr>
.. codeauthor:: Jeff Piolle <jfpiolle@ifremer.fr>
"""
import os
import argparse
import importlib
from datetime import datetime
from netCDF4 import num2date
from cerbereutils.resampling.resampler import Resampler
from cerbere.mapper.ncfile import NCFile
from cerbere.mapper.abstractmapper import AbstractMapper, WRITE_NEW
RESAMPLING_METHODS = ['closest_neighbour', 'bilinear']
def instantiate_features(srcproduct,
targetproduct,
srcmodelclass,
targetmodelclass,
srcmapperclass,
targetmapperclass,
srcargs):
"""Instantiate source and target feature objects from model, mapper and
file path.
Args:
srcproduct (str): full path to the source product file.
targetproduct (str): full path to the target product file.
srcmodelclass (:class:AbstractFeature): data model class of source
feature.
targetmodelclass (:class:AbstractFeature): data model class of target
feature.
srcmapperclass (:class:AbstractMapper): mapper class for the source
product file.
targetmapperclass (:class:AbstractMapper): mapper class for the target
product file.
srcargs (dict): additional input arguments for source mapper
instantiation.
"""
source = srcmodelclass()
target = targetmodelclass()
srcmapper = srcmapperclass(url=srcproduct,
**srcargs)
targetmapper = targetmapperclass(url=targetproduct)
# detect if grid file is centered on meridian 180
# TODO: to be improved by using a more flexible interpolator
if source.__class__.__name__ == 'Grid' and\
is_centered_on_180(srcmapperclass, srcproduct):
srcmapper.center_on_greenwhich = True
source.load(srcmapper)
target.load(targetmapper)
return source, target
def resample(srcfeature,
targetfeature,
fields=None,
method=None,
outdir='.',
filename=None,
use_source_time=False,
mask=None,
**kwargs
):
"""Resample source product on a target
Args:
method (str): name and arguments (seperated by `/`) of the
resampling method. For example:
'closest_neighbour/radius=1000'
'bilinear/extrapolation=4/fillvalue=0'
use_source_time (bool): use the source to fill in the time field
of the produced feature. this is used when the target is only
used for its spatial pattern (for instance the lat/lon of a grid).
mask (ndarray of bool): mask to apply on the resampled result
(will be combined with the native mask of the result array).
This is meant for instance to mask land data over which
interpolation may have been performed in the resampling
process.
"""
# resampling
if fields is None:
resampled_fields = srcfeature.get_fieldnames()
else:
resampled_fields = fields
method_name = method.split('/')[0]
method_kwargs = {k.split('=')[0]: k.split('=')[1]
for k in method.split('/')[1:]}
if method_name == 'closest_neighbour':
if 'radius' in method_kwargs:
radius = method_kwargs['radius']
else:
# 1.5 = approx. sqrt(2)
# value is converted from degree to meter
radius = srcfeature.get_spatial_resolution() / 2. * 150000
outfeature = Resampler.closest_neighbour(srcfeature,
targetfeature,
fields=resampled_fields,
radius=radius,
new=True,
add_reference=True,
**kwargs
)
elif method_name == 'bilinear':
if 'extrapolation' in method_kwargs:
extrapolation = int(method_kwargs['extrapolation'])
else:
extrapolation = 0
if 'fillvalue' in method_kwargs:
fillvalue = float(method_kwargs['fillvalue'])
else:
fillvalue = None
outfeature = Resampler.interpolation(srcfeature,
targetfeature,
fields=resampled_fields,
extrapolation=extrapolation,
fillvalue=fillvalue,
new=True,
add_reference=True,
add_geolocation=False,
mask=mask,
**kwargs
)
else:
raise Exception('Unknown resampling method')
# metadata
if outfeature:
# set global attributes
globalattrs = {}
timevals = srcfeature.get_times()
time0 = timevals.min()
time1 = timevals.max()
mintime = num2date(time0, srcfeature.get_time_units())
maxtime = num2date(time1, srcfeature.get_time_units())
globalattrs['time_coverage_start'] = mintime.strftime('%Y%m%dT%H%M%S')
globalattrs['time_coverage_stop'] = maxtime.strftime('%Y%m%dT%H%M%S')
if method[0] == 'closest_neighbour':
globalattrs['spatial_colocation_radius_in_km'] = radius / 1000.
# set time
if use_source_time:
field = srcfeature.get_geolocation_field('time').clone()
if len(field.dimensions) == 1:
field.set_values(
srcfeature.get_geolocation_field('time').get_values().copy()
)
print srcfeature.get_geolocation_field('time').get_values()
outfeature._geolocation_fields['time'] = field
print outfeature.get_geolocation_field('time').get_values()
# save file
if not os.path.exists(outdir):
os.makedirs(outdir)
if filename is None:
srcname = srcfeature.get_mapper().get_basename()
fname = os.path.join(outdir,
'-'.join([mintime.strftime('%Y%m%dT%H%M%S'),
'RESAMP',
srcname])
)
else:
fname = os.path.join(outdir, filename)
if os.path.exists(fname):
os.remove(fname)
outfile = NCFile(fname,
WRITE_NEW,
ncformat="NETCDF4")
outfeature.save(outfile, attrs=globalattrs)
outfile.close()
return
def get_options():
# Get a parser
parser = argparse.ArgumentParser(description="Resample data from a feature\
onto another feature")
# Set need argument
parser.add_argument("-p", "--source",
action="store",
dest="source_file", metavar="FILE",
help="full path to the source file (file to be\
resampled)")
parser.add_argument("-P", "--target",
action="store",
dest="target_file", metavar="FILE",
help="full path to the target file (on which the source is resampled)")
parser.add_argument("-f", "--fields",
action="store",
dest="fieldnames", metavar="FIELD1/FIELD2/...",
help="name of the field(s) to resample, slash separated (all by defaults)")
parser.add_argument("-o", "--output",
action="store",
dest="output", metavar="OUTPUT_DIR",
help="full path to the output directory")
parser.add_argument("-m", "--source-mapper-class",
action="store",
dest="source_mapper", metavar="MAPPER",
help="mapper for the source data file (default: NCFile)")
parser.add_argument("-d", "--source-model-class",
action="store",
dest="source_datamodel", metavar="MODEL",
help="data model for the source file (default: Grid)")
parser.add_argument("-M", "--target-mapper-class",
action="store",
dest="target_mapper", metavar="MAPPER",
help="mapper for the target data file (default: NCFile)")
parser.add_argument("-D", "--target-model-class",
action="store",
dest="target_datamodel", metavar="MODEL",
help="data model for the target file (default: Grid)")
parser.add_argument("-a", "--method",
action="store",
dest="resampling_method", metavar="string",
help="Resampling method")
parser.add_argument("-u", "--use-source-time", action='store_true',
dest="use_source_time",
help="Use the time of the source to fill the time of "
"the produced file (used when the target is only "
"used as for the pattern definition)"
)
parser.add_argument("-k", "--use-target-mask",
action="store",
dest="target_mask", metavar="FIELD/VALUE",
help="Use a FIELD from the target file to mask invalid"
" data in the remapped file where VALUE is set"
" (slash separated)"
)
options = parser.parse_args()
assert options.resampling_method.split('/')[0] in RESAMPLING_METHODS,\
'Unknown resampling method'
return options
def is_centered_on_180(srcmapperclass, srcproduct):
"""Auto-detection of grid centering on meridian 180"""
mapper = srcmapperclass(url=srcproduct)
mapper.open()
lons = mapper.read_values('lon')
res = False
if lons[0] > lons[-1]:
print "IS CNTERED"
res = True
mapper.close()
return res
if __name__ == "__main__":
"""
Example :
cerresample -s /home/guiriden/eautret/tmp/jeff/20100506-AMSRE-REMSS-L2P-amsr_l2b_v05_r42588.dat-v01.nc -T /home/guiriden/eautret/tmp/jeff/gulfstream-20100506-grid.nc -m GHRSSTNCFile -d Swath -r 20000.
"""
options = get_options()
source_file = options.source_file
target_file = options.target_file
if options.fieldnames:
fieldnames = options.fieldnames.split('/')
else:
fieldnames = None
if options.output:
output = options.output
else:
output = '.'
if options.use_source_time:
use_source_time = True
else:
use_source_time = False
method = options.resampling_method
# source mapper
input_args = {}
if not options.source_mapper:
source_mapper = getattr(
importlib.import_module(
'cerbere.mapper.ncfile'
),
'NCFile',
)
else:
mapper = options.source_mapper
if '/' in mapper:
# additional arguments
fields = mapper.split('/')
mapper = fields[0]
input_args = {f.split('=')[0]: f.split('=')[1] for f in fields[1:]}
if 'fillvalue' in input_args:
input_args['fillvalue'] = float(input_args['fillvalue'])
source_mapper = getattr(
importlib.import_module(
'cerbere.mapper.' + mapper.lower()
),
mapper,
)
if not options.source_datamodel:
source_datamodel = getattr(
importlib.import_module(
'cerbere.datamodel.grid'
),
'Grid'
)
else:
source_datamodel = getattr(
importlib.import_module(
'cerbere.datamodel.' + options.source_datamodel.lower()
),
options.source_datamodel
)
# target
if not options.target_mapper:
target_mapper = getattr(
importlib.import_module(
'cerbere.mapper.ncfile'
),
'NCFile'
)
else:
target_mapper = getattr(
importlib.import_module(
'cerbere.mapper.' + options.target_mapper.lower()
),
options.target_mapper
)
if not options.target_datamodel:
target_datamodel = getattr(
importlib.import_module(
'cerbere.datamodel.grid'
),
'Grid'
)
else:
target_datamodel = getattr(
importlib.import_module(
'cerbere.datamodel.' + options.target_datamodel.lower()
),
options.target_datamodel
)
# case where a specific time step is requested for a GridTimeSeries
if ':' in source_file:
source_file, timearg = source_file.split(':')
if timearg.split('=')[0] != 'timestep':
raise Exception("Unknown keyword: ", timearg.split('=')[0])
timestep = datetime.strptime(timearg.split('=')[1],
'%Y%m%dT%H%M%S')
else:
timestep = None
# instantiate features
srcfeature, targetfeature = instantiate_features(
source_file,
target_file,
srcmodelclass=source_datamodel,
targetmodelclass=target_datamodel,
srcmapperclass=source_mapper,
targetmapperclass=target_mapper,
srcargs=input_args
)
if not options.target_mask:
mask = None
else:
mask_field = options.target_mask.split('/')[0]
mask_value = int(options.target_mask.split('/')[1])
mask = (targetfeature.get_values(mask_field) != mask_value)
if srcfeature.__class__.__name__ == 'GridTimeSeries' and timestep is None:
for grid in srcfeature:
date = grid.get_datetimes()[0]
datestr = date.strftime('%Y%m%d%H%M%S').replace(' ', '0')
filename = "_".join([
datestr,
"RESAMP",
os.path.basename(source_file)]).strip()
resample(grid,
targetfeature,
fields=fieldnames,
method=method,
outdir=output,
filename=filename,
use_source_time=use_source_time,
mask=mask
)
else:
if srcfeature.__class__.__name__ == 'GridTimeSeries':
if timestep is not None:
srcfeature = srcfeature.extract_grid(time=timestep)
resample(srcfeature,
targetfeature,
fields=fieldnames,
method=method,
outdir=output,
use_source_time=use_source_time,
mask=mask
)
This diff is collapsed.
"""
Installation
written with Cython. To compile:
.. code-block:: python
python setup.py build_ext --inplace
"""
import numpy as np
# "cimport" is used to import special compile-time information
# about the numpy module (this is stored in a file numpy.pxd which is
# currently part of the Cython distribution).
cimport numpy as np
# "ctypedef" assigns a corresponding compile-time type to DTYPE_t. For
# every type in the numpy module there's a corresponding compile-time
# type with a _t-suffix.
ctypedef np.float32_t DTYPEf32_t
ctypedef np.float64_t DTYPEf64_t
def fastregrid(lats, lons, values,
latmin=-90., latmax=90., lonmin=-180., lonmax=180.,
lonbinsize=1., latbinsize=1.):
"""
Place unevenly spaced 2D data on a simple grid by 2D binning (mean).
Args
lat (ndarray (1D) of numpy float32): the independent data latitude-axis
of the grid.
lon (ndarray (1D) of numpy float32): the independent data longitude
axis of the grid.
values (ndarray (1D)): the dependent data in the form
values = f(lat,lon).
latmin, latmax, lonmin, lonmax (float, optional): the limits of the
grid on which to remap the input data
lonbinsize, latbinsize (float, optional): the full width and height of
each bin on the grid. If each bin is a cube, then this is the x
and y dimension. This is the step in both directions, x and y.
Defaults to 1.
Returns
grid (ndarray (2D)): the evenly gridded data. The value of each cell
is the sum value of the contents of the bin.
bins (ndarray (2D)): A grid the same shape as `grid`, except the value
of each cell is the number of points in that bin.
"""
if values.dtype == 'float64':
result, nbsampl = __fastregrid_float64(
lats.astype('float32'),
lons.astype('float32'),
values,
latmin, latmax, lonmin, lonmax,
lonbinsize, latbinsize
)
elif values.dtype == 'float32':
result, nbsampl = __fastregrid_float32(
lats.astype('float32'),
lons.astype('float32'),
values,
latmin, latmax, lonmin, lonmax,
lonbinsize, latbinsize
)
else:
raise Exception('Array type no yet supported : %s', values.dtype)
return result, nbsampl
def __fastregrid_float64(
np.ndarray[DTYPEf32_t, ndim=1] lat,
np.ndarray[DTYPEf32_t, ndim=1] lon,
np.ndarray[DTYPEf64_t, ndim=1] values,
float latmin=-90.,
float latmax=90.,
float lonmin=-180.,
float lonmax=180.,
float lonbinsize=1.,
float latbinsize=1.):
"""
Place unevenly spaced 2D float64 data on a simple grid by 2D binning
(mean).
Args
lat (ndarray (1D) of numpy float32): the independent data latitude-axis
of the grid.
lon (ndarray (1D) of numpy float32): the independent data longitude
axis of the grid.
values (ndarray (1D)): the dependent data in the form
values = f(lat,lon).
latmin, latmax, lonmin, lonmax (float, optional): the limits of the
grid on which to remap the input data
lonbinsize, latbinsize (float, optional): the full width and height of
each bin on the grid. If each bin is a cube, then this is the x
and y dimension. This is the step in both directions, x and y.
Defaults to 1.
Returns
grid (ndarray (2D)): the evenly gridded data. The value of each cell
is the sum value of the contents of the bin.
bins (ndarray (2D)): A grid the same shape as `grid`, except the value
of each cell is the number of points in that bin.
"""
cdef int lonsize = np.round((lonmax - lonmin) / lonbinsize)
cdef int latsize = np.round((latmax - latmin) / latbinsize)
cdef np.ndarray[np.float64_t, ndim=2] grid\
= np.zeros([latsize, lonsize], dtype=np.float64)
cdef np.ndarray[np.int_t, ndim=2] bins\
= np.zeros([latsize, lonsize], dtype=np.int)
cdef unsigned int i
cdef unsigned int x, y
cdef double val
for i, val in enumerate(values):
if latmin <= lat[<unsigned int> i]\
and lat[<unsigned int> i] < latmax\
and lonmin <= lon[<unsigned int> i]\
and lon[<unsigned int> i] < lonmax:
y = <unsigned int> np.floor(
(lat[<unsigned int> i] - latmin) / latbinsize)
x = <unsigned int> np.floor(
(lon[<unsigned int> i] - lonmin) / lonbinsize)
grid[y, x] = grid[y, x] + values[<unsigned int> i]
bins[y, x] += 1
return grid, bins
def __fastregrid_float32(
np.ndarray[DTYPEf32_t, ndim=1] lat,
np.ndarray[DTYPEf32_t, ndim=1] lon,
np.ndarray[DTYPEf32_t, ndim=1] values,
float latmin=-90.,
float latmax=90.,
float lonmin=-180.,
float lonmax=180.,
float lonbinsize=1.,
float latbinsize=1.):
"""
Place unevenly spaced 2D float32 data on a simple grid by 2D binning
(mean).
Args
lat (ndarray (1D) of numpy float32): the independent data latitude-axis
of the grid.
lon (ndarray (1D) of numpy float32): the independent data longitude
axis of the grid.
values (ndarray (1D)): the dependent data in the form
values = f(lat,lon).
latmin, latmax, lonmin, lonmax (float, optional): the limits of the
grid on which to remap the input data
lonbinsize, latbinsize (float, optional): the full width and height of
each bin on the grid. If each bin is a cube, then this is the x
and y dimension. This is the step in both directions, x and y.
Defaults to 1.
Returns
grid (ndarray (2D)): the evenly gridded data. The value of each cell
is the sum value of the contents of the bin.
bins (ndarray (2D)): A grid the same shape as `grid`, except the value
of each cell is the number of points in that bin.
"""
cdef int lonsize = np.round((lonmax - lonmin) / lonbinsize)
cdef int latsize = np.round((latmax - latmin) / latbinsize)
cdef np.ndarray[np.float32_t, ndim=2] grid\
= np.zeros([latsize, lonsize], dtype=np.float32)
cdef np.ndarray[np.int_t, ndim=2] bins\
= np.zeros([latsize, lonsize], dtype=np.int)
cdef unsigned int i
cdef unsigned int x, y
cdef double val
for i, val in enumerate(values):
if latmin <= lat[<unsigned int> i]\
and lat[<unsigned int> i] < latmax\