Commit bd2a2b47 authored by PIOLLE's avatar PIOLLE
Browse files

fixes to internal encoding

parent 9f06deec
......@@ -25,9 +25,6 @@ ENCODING = 'cerbere'
class Encoding(Enum):
"""attributes for saving the encoding of a source file"""
# attribute for marking variables with no fill value (like masks)
UNMASKED: str = 'no_fillvalue'
# parent dataset the field belongs to
DATASET: str = '_attached_dataset'
......@@ -188,11 +185,11 @@ def to_cerbere_dataarray(
def io_encoding_dtype(data: xr.DataArray) -> np.dtype:
"""guess the scientific dtype from data read on file"""
dtype = data.encoding.get(
scale = data.encoding.get(
'scale_factor', data.encoding.get('add_offset', None))
fillv = data.encoding.get(
'_FillValue', data.attrs.get('_FillValue', None))
if dtype is None:
if scale is None:
# no scaling of data -> dtype should be unchanged
if fillv is None:
......@@ -205,6 +202,7 @@ def io_encoding_dtype(data: xr.DataArray) -> np.dtype:
return np.dtype(fillv)
else:
dtype = np.dtype(type(scale))
if dtype != data.dtype:
# xarray may have changed the dtype for instance if fill values
# were replaced with NaN. Returns the intended scientific dtype
......@@ -214,7 +212,6 @@ def io_encoding_dtype(data: xr.DataArray) -> np.dtype:
return dtype
def from_cerbere_dataarray(
data,
as_masked_array: bool = True,
......@@ -250,7 +247,14 @@ def from_cerbere_dataarray(
data.encoding[ENCODING][Encoding.M_DTYPE],
copy=False).to_masked_array(copy=False)
mdata.set_fill_value(data.encoding['_FillValue'])
mdata.mask = data.encoding[ENCODING][Encoding.M_MASK]
if Encoding.M_MASK in data.encoding[ENCODING]:
mask = data.encoding[ENCODING][Encoding.M_MASK]
if mask is None:
mdata.mask = False
else:
mdata.mask = mask
else:
mdata.mask = np.where(mdata == data.encoding['_FillValue'])
return mdata
......
......@@ -671,7 +671,7 @@ class Dataset(ABC):
# mark variables with no fill value
if '_FillValue' not in v.encoding:
encoding[internals.Encoding.UNMASKED] = True
encoding[internals.Encoding.M_MASK] = None
encoding[internals.Encoding.IO_FILLVALUE] = self._xr_fillvalue(v)
encoding[internals.Encoding.IO_DTYPE] = v.encoding.get(
'dtype', None)
......@@ -681,7 +681,6 @@ class Dataset(ABC):
'scale_factor', None)
if internals.ENCODING not in v.encoding:
logging.warning("Cerbere internal attribute should be there")
v.encoding[internals.ENCODING] = {}
v.encoding[internals.ENCODING].update(encoding)
......@@ -1964,8 +1963,8 @@ class Dataset(ABC):
return True
if internals.ENCODING not in encoding:
return '_FillValue' in encoding
return not encoding[internals.ENCODING].get(
internals.Encoding.UNMASKED, False)
return encoding[internals.ENCODING].get(
internals.Encoding.M_MASK, False) is not None
# ensure original or overriding encoding
for v in saved_dataset.variables:
......
......@@ -154,7 +154,8 @@ class Field(object):
if fill_value is None:
fill_value = internals.default_fill_value(dtype)
if dtype.kind != numpy.dtype(type(fill_value)).kind:
if dtype.kind != numpy.dtype(type(fill_value)).kind and \
dtype.kind != 'O':
raise TypeError(
"Inconsistent data ({}) and fillvalue ({}) dtype. If you "
"provided data as an xarray DataArray, it is possible you "
......@@ -168,7 +169,8 @@ class Field(object):
# simple casting from internal array to Field API
self._array = data
else:
self._array = internals.to_cerbere_dataarray(data, fill_value, dtype)
self._array = internals.to_cerbere_dataarray(
data, fill_value, dtype)
else:
# create the DataArray from the provided information
self._array = self._create_from_array(
......@@ -198,7 +200,9 @@ class Field(object):
self.encoding[internals.ENCODING] = {}
# internal special attributes
self._internals[internals.Encoding.UNMASKED] = no_missing_value
if no_missing_value:
self._internals[internals.Encoding.M_MASK] = None
self._internals[internals.Encoding.M_DTYPE] = dtype
# components for complex fields
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment