Commit bd2a2b47 authored by PIOLLE's avatar PIOLLE
Browse files

fixes to internal encoding

parent 9f06deec
...@@ -25,9 +25,6 @@ ENCODING = 'cerbere' ...@@ -25,9 +25,6 @@ ENCODING = 'cerbere'
class Encoding(Enum): class Encoding(Enum):
"""attributes for saving the encoding of a source file""" """attributes for saving the encoding of a source file"""
# attribute for marking variables with no fill value (like masks)
UNMASKED: str = 'no_fillvalue'
# parent dataset the field belongs to # parent dataset the field belongs to
DATASET: str = '_attached_dataset' DATASET: str = '_attached_dataset'
...@@ -188,11 +185,11 @@ def to_cerbere_dataarray( ...@@ -188,11 +185,11 @@ def to_cerbere_dataarray(
def io_encoding_dtype(data: xr.DataArray) -> np.dtype: def io_encoding_dtype(data: xr.DataArray) -> np.dtype:
"""guess the scientific dtype from data read on file""" """guess the scientific dtype from data read on file"""
dtype = data.encoding.get( scale = data.encoding.get(
'scale_factor', data.encoding.get('add_offset', None)) 'scale_factor', data.encoding.get('add_offset', None))
fillv = data.encoding.get( fillv = data.encoding.get(
'_FillValue', data.attrs.get('_FillValue', None)) '_FillValue', data.attrs.get('_FillValue', None))
if dtype is None: if scale is None:
# no scaling of data -> dtype should be unchanged # no scaling of data -> dtype should be unchanged
if fillv is None: if fillv is None:
...@@ -205,6 +202,7 @@ def io_encoding_dtype(data: xr.DataArray) -> np.dtype: ...@@ -205,6 +202,7 @@ def io_encoding_dtype(data: xr.DataArray) -> np.dtype:
return np.dtype(fillv) return np.dtype(fillv)
else: else:
dtype = np.dtype(type(scale))
if dtype != data.dtype: if dtype != data.dtype:
# xarray may have changed the dtype for instance if fill values # xarray may have changed the dtype for instance if fill values
# were replaced with NaN. Returns the intended scientific dtype # were replaced with NaN. Returns the intended scientific dtype
...@@ -214,7 +212,6 @@ def io_encoding_dtype(data: xr.DataArray) -> np.dtype: ...@@ -214,7 +212,6 @@ def io_encoding_dtype(data: xr.DataArray) -> np.dtype:
return dtype return dtype
def from_cerbere_dataarray( def from_cerbere_dataarray(
data, data,
as_masked_array: bool = True, as_masked_array: bool = True,
...@@ -250,7 +247,14 @@ def from_cerbere_dataarray( ...@@ -250,7 +247,14 @@ def from_cerbere_dataarray(
data.encoding[ENCODING][Encoding.M_DTYPE], data.encoding[ENCODING][Encoding.M_DTYPE],
copy=False).to_masked_array(copy=False) copy=False).to_masked_array(copy=False)
mdata.set_fill_value(data.encoding['_FillValue']) mdata.set_fill_value(data.encoding['_FillValue'])
mdata.mask = data.encoding[ENCODING][Encoding.M_MASK] if Encoding.M_MASK in data.encoding[ENCODING]:
mask = data.encoding[ENCODING][Encoding.M_MASK]
if mask is None:
mdata.mask = False
else:
mdata.mask = mask
else:
mdata.mask = np.where(mdata == data.encoding['_FillValue'])
return mdata return mdata
......
...@@ -671,7 +671,7 @@ class Dataset(ABC): ...@@ -671,7 +671,7 @@ class Dataset(ABC):
# mark variables with no fill value # mark variables with no fill value
if '_FillValue' not in v.encoding: if '_FillValue' not in v.encoding:
encoding[internals.Encoding.UNMASKED] = True encoding[internals.Encoding.M_MASK] = None
encoding[internals.Encoding.IO_FILLVALUE] = self._xr_fillvalue(v) encoding[internals.Encoding.IO_FILLVALUE] = self._xr_fillvalue(v)
encoding[internals.Encoding.IO_DTYPE] = v.encoding.get( encoding[internals.Encoding.IO_DTYPE] = v.encoding.get(
'dtype', None) 'dtype', None)
...@@ -681,7 +681,6 @@ class Dataset(ABC): ...@@ -681,7 +681,6 @@ class Dataset(ABC):
'scale_factor', None) 'scale_factor', None)
if internals.ENCODING not in v.encoding: if internals.ENCODING not in v.encoding:
logging.warning("Cerbere internal attribute should be there")
v.encoding[internals.ENCODING] = {} v.encoding[internals.ENCODING] = {}
v.encoding[internals.ENCODING].update(encoding) v.encoding[internals.ENCODING].update(encoding)
...@@ -1964,8 +1963,8 @@ class Dataset(ABC): ...@@ -1964,8 +1963,8 @@ class Dataset(ABC):
return True return True
if internals.ENCODING not in encoding: if internals.ENCODING not in encoding:
return '_FillValue' in encoding return '_FillValue' in encoding
return not encoding[internals.ENCODING].get( return encoding[internals.ENCODING].get(
internals.Encoding.UNMASKED, False) internals.Encoding.M_MASK, False) is not None
# ensure original or overriding encoding # ensure original or overriding encoding
for v in saved_dataset.variables: for v in saved_dataset.variables:
......
...@@ -154,7 +154,8 @@ class Field(object): ...@@ -154,7 +154,8 @@ class Field(object):
if fill_value is None: if fill_value is None:
fill_value = internals.default_fill_value(dtype) fill_value = internals.default_fill_value(dtype)
if dtype.kind != numpy.dtype(type(fill_value)).kind: if dtype.kind != numpy.dtype(type(fill_value)).kind and \
dtype.kind != 'O':
raise TypeError( raise TypeError(
"Inconsistent data ({}) and fillvalue ({}) dtype. If you " "Inconsistent data ({}) and fillvalue ({}) dtype. If you "
"provided data as an xarray DataArray, it is possible you " "provided data as an xarray DataArray, it is possible you "
...@@ -168,7 +169,8 @@ class Field(object): ...@@ -168,7 +169,8 @@ class Field(object):
# simple casting from internal array to Field API # simple casting from internal array to Field API
self._array = data self._array = data
else: else:
self._array = internals.to_cerbere_dataarray(data, fill_value, dtype) self._array = internals.to_cerbere_dataarray(
data, fill_value, dtype)
else: else:
# create the DataArray from the provided information # create the DataArray from the provided information
self._array = self._create_from_array( self._array = self._create_from_array(
...@@ -198,7 +200,9 @@ class Field(object): ...@@ -198,7 +200,9 @@ class Field(object):
self.encoding[internals.ENCODING] = {} self.encoding[internals.ENCODING] = {}
# internal special attributes # internal special attributes
self._internals[internals.Encoding.UNMASKED] = no_missing_value if no_missing_value:
self._internals[internals.Encoding.M_MASK] = None
self._internals[internals.Encoding.M_DTYPE] = dtype self._internals[internals.Encoding.M_DTYPE] = dtype
# components for complex fields # components for complex fields
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment