Commit 20682027 authored by PIOLLE's avatar PIOLLE
Browse files

improved handling of masked values for non float vars

parent f235f133
......@@ -1027,12 +1027,13 @@ class Dataset(ABC):
{field.name: dataarr}
)
except ValueError:
# an error cas when for instance an index (like time) has masked
# an error case when for instance an index (like time) has masked
# values. This seems to do the trick, in some tested cases but
# probably not all of them...
logging.warning("Entering a special case where not xarray variable "
warnings.warn(
"Entering a special case where not xarray variable "
"assignment was possible, probably because of fill "
"values in a coordinate")
"values in a coordinate. Field: {}".format(field.name))
self._std_dataset[field.name] = xr.DataArray(
coords=[(d, self._std_dataset.coords[d]) for d in field.dims],
data=field.get_values(),
......@@ -1990,17 +1991,26 @@ class Dataset(ABC):
self._format_nc_attrs(saved_dataset)
for v in saved_dataset.variables:
if 'zlib' not in saved_dataset[v].encoding:
saved_dataset[v].encoding['zlib'] = True
if 'complevel' not in saved_dataset[v].encoding:
saved_dataset[v].encoding['complevel'] = 4
if ('_FillValue' not in saved_dataset[v].encoding
and saved_dataset[v].dtype != np.object):
encoding = saved_dataset[v].encoding
if 'zlib' not in encoding:
encoding['zlib'] = True
if 'complevel' not in encoding:
encoding['complevel'] = 4
if saved_dataset[v].dtype != np.object:
if '_FillValue' in saved_dataset[v].attrs:
fillv = saved_dataset[v].attrs.pop('_FillValue')
elif '_FillValue' in encoding:
fillv = encoding.pop('_FillValue')
else:
fillv = default_fill_value(saved_dataset[v].dtype)
saved_dataset[v].encoding['_FillValue'] = fillv
if (('_no_missing_value' not in encoding)
or not encoding['_no_missing_value']):
encoding['_FillValue'] = fillv
saved_dataset[v].encoding.update(encoding)
# adjust missing value attribute types if packing is applied
for matt in ['valid_min', 'valid_max', 'valid_range']:
......
......@@ -10,6 +10,7 @@ import copy
from collections import OrderedDict
from typing import (Any, Dict, Hashable, Iterable, Iterator, List,
Mapping, Optional, Sequence, Set, Tuple, Union, cast)
import warnings
import numpy
import xarray as xr
......@@ -78,7 +79,12 @@ class Field(object):
fillvalue : the default value to associate with missing data in the
field's values. The fillvalue must be of the same type as
`datatype` and `values`
`datatype` and `values`. If None, default fill value will be used
unless `no_missing_value` is set.
no_missing_value: if True, the field will not contain any missing value.
In practice no _FillValue attribute will be written for this field
when saving it.
attrs (dict) : a dictionary of the metadata associated with the field's
values.
......@@ -107,6 +113,7 @@ class Field(object):
components: Optional[Tuple['Field']] = None,
dataset: Optional['Dataset'] = None,
fillvalue: Optional[Any] = None,
no_missing_value: bool = False,
precision: Optional[int] = None,
description: Optional[str] = None,
standard_name: Optional[Union[str, Tuple[str, str]]] = None,
......@@ -120,6 +127,12 @@ class Field(object):
if name is not None and not isinstance(name, str):
raise TypeError('name must be a string')
if (isinstance(data, xr.DataArray)
and '_cerbere_status' in data.encoding):
# simple casting of internal datarrayarray to Field API
self._array = data
return
# dtype
if data is None:
if dtype is None:
......@@ -131,10 +144,21 @@ class Field(object):
dtype = data.dtype
# fill value
if fillvalue is None:
if fillvalue is None and not no_missing_value:
fillvalue = cf.default_fill_value(dtype)
if no_missing_value:
if fillvalue is not None:
warnings.warn(
'fillvalue should not be set if no_missing_value is set. it'
'will be ignored.'
)
fillvalue = None
if isinstance(data, xr.DataArray):
if '_cerbere_status' in data.encoding:
# simple casting from internal array to Field API
self._array = data
else:
self._array = cf.set_masked_values(data, fillvalue)
else:
# create the DataArray from the provided information
......@@ -153,8 +177,11 @@ class Field(object):
"OrderedDict"
)
if not no_missing_value:
data = numpy.ma.masked_all(tuple(dims.values()), dtype)
data.set_fill_value(fillvalue)
else:
data = numpy.zeros(tuple(dims.values()), dtype)
# instantiate the xarray representation
kwargs['dims'] = list(dims)
......@@ -186,6 +213,7 @@ class Field(object):
self.encoding.update(encoding)
# internal special attributes
self._array.encoding['_no_missing_value'] = no_missing_value
# components for complex fields
if components is not None:
......@@ -205,7 +233,10 @@ class Field(object):
self._array.encoding['_attached_dataset'] = dataset
# @TODO self.handler ???
self._array.encoding['cerbere_status'] = "changed"
if dataset is None:
self._array.encoding['_cerbere_status'] = "new"
else:
self._array.encoding['_cerbere_status'] = "changed"
@classmethod
def to_field(cls, data: xr.DataArray) -> 'Field':
......@@ -829,7 +860,7 @@ class Field(object):
new_field = Field(data=subarray.copy(deep=True))
# detach from any dataset
new_field.dataset = None
new_field._array.encoding['_attached_dataset'] = None
if prefix is not None:
new_field.set_name(prefix + new_field.name)
......@@ -845,7 +876,7 @@ class Field(object):
if self.dataset is not None:
if self.name not in self.dataset.fieldnames:
raise ValueError("Field {} not existing".format(self.name))
self.dataset.rename_field({self.name: newname})
self.dataset.rename_field(self.name, newname)
self.name = newname
def __add__(self, other: 'Field') -> 'Field':
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment