Commit 20682027 authored by PIOLLE's avatar PIOLLE
Browse files

improved handling of masked values for non float vars

parent f235f133
...@@ -1027,12 +1027,13 @@ class Dataset(ABC): ...@@ -1027,12 +1027,13 @@ class Dataset(ABC):
{field.name: dataarr} {field.name: dataarr}
) )
except ValueError: except ValueError:
# an error cas when for instance an index (like time) has masked # an error case when for instance an index (like time) has masked
# values. This seems to do the trick, in some tested cases but # values. This seems to do the trick, in some tested cases but
# probably not all of them... # probably not all of them...
logging.warning("Entering a special case where not xarray variable " warnings.warn(
"assignment was possible, probably because of fill " "Entering a special case where not xarray variable "
"values in a coordinate") "assignment was possible, probably because of fill "
"values in a coordinate. Field: {}".format(field.name))
self._std_dataset[field.name] = xr.DataArray( self._std_dataset[field.name] = xr.DataArray(
coords=[(d, self._std_dataset.coords[d]) for d in field.dims], coords=[(d, self._std_dataset.coords[d]) for d in field.dims],
data=field.get_values(), data=field.get_values(),
...@@ -1990,17 +1991,26 @@ class Dataset(ABC): ...@@ -1990,17 +1991,26 @@ class Dataset(ABC):
self._format_nc_attrs(saved_dataset) self._format_nc_attrs(saved_dataset)
for v in saved_dataset.variables: for v in saved_dataset.variables:
if 'zlib' not in saved_dataset[v].encoding: encoding = saved_dataset[v].encoding
saved_dataset[v].encoding['zlib'] = True
if 'complevel' not in saved_dataset[v].encoding: if 'zlib' not in encoding:
saved_dataset[v].encoding['complevel'] = 4 encoding['zlib'] = True
if ('_FillValue' not in saved_dataset[v].encoding if 'complevel' not in encoding:
and saved_dataset[v].dtype != np.object): encoding['complevel'] = 4
if saved_dataset[v].dtype != np.object:
if '_FillValue' in saved_dataset[v].attrs: if '_FillValue' in saved_dataset[v].attrs:
fillv = saved_dataset[v].attrs.pop('_FillValue') fillv = saved_dataset[v].attrs.pop('_FillValue')
elif '_FillValue' in encoding:
fillv = encoding.pop('_FillValue')
else: else:
fillv = default_fill_value(saved_dataset[v].dtype) fillv = default_fill_value(saved_dataset[v].dtype)
saved_dataset[v].encoding['_FillValue'] = fillv
if (('_no_missing_value' not in encoding)
or not encoding['_no_missing_value']):
encoding['_FillValue'] = fillv
saved_dataset[v].encoding.update(encoding)
# adjust missing value attribute types if packing is applied # adjust missing value attribute types if packing is applied
for matt in ['valid_min', 'valid_max', 'valid_range']: for matt in ['valid_min', 'valid_max', 'valid_range']:
......
...@@ -10,6 +10,7 @@ import copy ...@@ -10,6 +10,7 @@ import copy
from collections import OrderedDict from collections import OrderedDict
from typing import (Any, Dict, Hashable, Iterable, Iterator, List, from typing import (Any, Dict, Hashable, Iterable, Iterator, List,
Mapping, Optional, Sequence, Set, Tuple, Union, cast) Mapping, Optional, Sequence, Set, Tuple, Union, cast)
import warnings
import numpy import numpy
import xarray as xr import xarray as xr
...@@ -78,7 +79,12 @@ class Field(object): ...@@ -78,7 +79,12 @@ class Field(object):
fillvalue : the default value to associate with missing data in the fillvalue : the default value to associate with missing data in the
field's values. The fillvalue must be of the same type as field's values. The fillvalue must be of the same type as
`datatype` and `values` `datatype` and `values`. If None, default fill value will be used
unless `no_missing_value` is set.
no_missing_value: if True, the field will not contain any missing value.
In practice no _FillValue attribute will be written for this field
when saving it.
attrs (dict) : a dictionary of the metadata associated with the field's attrs (dict) : a dictionary of the metadata associated with the field's
values. values.
...@@ -107,6 +113,7 @@ class Field(object): ...@@ -107,6 +113,7 @@ class Field(object):
components: Optional[Tuple['Field']] = None, components: Optional[Tuple['Field']] = None,
dataset: Optional['Dataset'] = None, dataset: Optional['Dataset'] = None,
fillvalue: Optional[Any] = None, fillvalue: Optional[Any] = None,
no_missing_value: bool = False,
precision: Optional[int] = None, precision: Optional[int] = None,
description: Optional[str] = None, description: Optional[str] = None,
standard_name: Optional[Union[str, Tuple[str, str]]] = None, standard_name: Optional[Union[str, Tuple[str, str]]] = None,
...@@ -120,6 +127,12 @@ class Field(object): ...@@ -120,6 +127,12 @@ class Field(object):
if name is not None and not isinstance(name, str): if name is not None and not isinstance(name, str):
raise TypeError('name must be a string') raise TypeError('name must be a string')
if (isinstance(data, xr.DataArray)
and '_cerbere_status' in data.encoding):
# simple casting of internal datarrayarray to Field API
self._array = data
return
# dtype # dtype
if data is None: if data is None:
if dtype is None: if dtype is None:
...@@ -131,11 +144,22 @@ class Field(object): ...@@ -131,11 +144,22 @@ class Field(object):
dtype = data.dtype dtype = data.dtype
# fill value # fill value
if fillvalue is None: if fillvalue is None and not no_missing_value:
fillvalue = cf.default_fill_value(dtype) fillvalue = cf.default_fill_value(dtype)
if no_missing_value:
if fillvalue is not None:
warnings.warn(
'fillvalue should not be set if no_missing_value is set. it'
'will be ignored.'
)
fillvalue = None
if isinstance(data, xr.DataArray): if isinstance(data, xr.DataArray):
self._array = cf.set_masked_values(data, fillvalue) if '_cerbere_status' in data.encoding:
# simple casting from internal array to Field API
self._array = data
else:
self._array = cf.set_masked_values(data, fillvalue)
else: else:
# create the DataArray from the provided information # create the DataArray from the provided information
...@@ -153,8 +177,11 @@ class Field(object): ...@@ -153,8 +177,11 @@ class Field(object):
"OrderedDict" "OrderedDict"
) )
data = numpy.ma.masked_all(tuple(dims.values()), dtype) if not no_missing_value:
data.set_fill_value(fillvalue) data = numpy.ma.masked_all(tuple(dims.values()), dtype)
data.set_fill_value(fillvalue)
else:
data = numpy.zeros(tuple(dims.values()), dtype)
# instantiate the xarray representation # instantiate the xarray representation
kwargs['dims'] = list(dims) kwargs['dims'] = list(dims)
...@@ -186,6 +213,7 @@ class Field(object): ...@@ -186,6 +213,7 @@ class Field(object):
self.encoding.update(encoding) self.encoding.update(encoding)
# internal special attributes # internal special attributes
self._array.encoding['_no_missing_value'] = no_missing_value
# components for complex fields # components for complex fields
if components is not None: if components is not None:
...@@ -205,7 +233,10 @@ class Field(object): ...@@ -205,7 +233,10 @@ class Field(object):
self._array.encoding['_attached_dataset'] = dataset self._array.encoding['_attached_dataset'] = dataset
# @TODO self.handler ??? # @TODO self.handler ???
self._array.encoding['cerbere_status'] = "changed" if dataset is None:
self._array.encoding['_cerbere_status'] = "new"
else:
self._array.encoding['_cerbere_status'] = "changed"
@classmethod @classmethod
def to_field(cls, data: xr.DataArray) -> 'Field': def to_field(cls, data: xr.DataArray) -> 'Field':
...@@ -829,7 +860,7 @@ class Field(object): ...@@ -829,7 +860,7 @@ class Field(object):
new_field = Field(data=subarray.copy(deep=True)) new_field = Field(data=subarray.copy(deep=True))
# detach from any dataset # detach from any dataset
new_field.dataset = None new_field._array.encoding['_attached_dataset'] = None
if prefix is not None: if prefix is not None:
new_field.set_name(prefix + new_field.name) new_field.set_name(prefix + new_field.name)
...@@ -845,7 +876,7 @@ class Field(object): ...@@ -845,7 +876,7 @@ class Field(object):
if self.dataset is not None: if self.dataset is not None:
if self.name not in self.dataset.fieldnames: if self.name not in self.dataset.fieldnames:
raise ValueError("Field {} not existing".format(self.name)) raise ValueError("Field {} not existing".format(self.name))
self.dataset.rename_field({self.name: newname}) self.dataset.rename_field(self.name, newname)
self.name = newname self.name = newname
def __add__(self, other: 'Field') -> 'Field': def __add__(self, other: 'Field') -> 'Field':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment