Commit 1c5d9171 authored by BODERE's avatar BODERE
Browse files

Merge branch 'master' of https://gitlab.ifremer.fr/cerbere/cerbere

 Conflicts:
	.gitlab-ci.yml
parents c0eee1f3 b8e039e4
Pipeline #11152 passed with stages
in 2 minutes and 34 seconds
......@@ -7,11 +7,11 @@ image: continuumio/miniconda3:latest
# stages (main steps of pipeline)
stages:
- Quality
- Tests
- Sonarqube
# - Quality
# - Tests
# - Sonarqube
- Documentation
- Deploy
# - Deploy
# ---------------------------------------------------------------
# Jobs templates
......@@ -84,78 +84,78 @@ stages:
# ---------------------------------------------------------------
# Quality jobs
# ---------------------------------------------------------------
flake8:
<<: *quality
script:
- poetry run flake8 --max-line-length=120 --docstring-convention google ${CI_PROJECT_NAME}
pylint:
<<: *quality
script:
#- poetry run pylint -j8 -E ${CI_PROJECT_NAME} tests
- "poetry run pylint --exit-zero ${CI_PROJECT_NAME} tests -r n --max-line-length=120 --msg-template='{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}' | tee pylint.txt"
artifacts:
expire_in: 1 week
paths:
- pylint.txt
#flake8:
# <<: *quality
# script:
# - poetry run flake8 --max-line-length=120 --docstring-convention google ${CI_PROJECT_NAME}
#
#pylint:
# <<: *quality
# script:
# #- poetry run pylint -j8 -E ${CI_PROJECT_NAME} tests
# - "poetry run pylint --exit-zero ${CI_PROJECT_NAME} tests -r n --max-line-length=120 --msg-template='{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}' | tee pylint.txt"
# artifacts:
# expire_in: 1 week
# paths:
# - pylint.txt
# ---------------------------------------------------------------
# Test jobs
# ---------------------------------------------------------------
python3.7:
<<: *test_37
image: continuumio/miniconda3
allow_failure: true
script:
- poetry run pytest tests/
python3.8:
<<: *test_38
image: continuumio/miniconda3
allow_failure: true
script:
- poetry run pytest tests/
python3.9:
<<: *install-conda-deps-39
<<: *code-changes
stage: Tests
image: continuumio/miniconda3
coverage: '/^TOTAL\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+\%)/'
allow_failure: true
script:
- poetry run pytest --cov=${CI_PROJECT_NAME} --cov-branch --cov-report=term tests/
- poetry run pytest -ra -q --cov=${CI_PROJECT_NAME} --cov-branch --cov-report xml:coverage.xml tests/
artifacts:
expire_in: 1 week
paths:
- coverage.xml
except:
- tags
#python3.7:
# <<: *test_37
# image: continuumio/miniconda3
# allow_failure: true
# script:
# - poetry run pytest tests/
#
#python3.8:
# <<: *test_38
# image: continuumio/miniconda3
# allow_failure: true
# script:
# - poetry run pytest tests/
#
#python3.9:
# <<: *install-conda-deps-39
# <<: *code-changes
# stage: Tests
# image: continuumio/miniconda3
# coverage: '/^TOTAL\s+\d+\s+\d+\s+\d+\s+\d+\s+(\d+\%)/'
# allow_failure: true
# script:
# - poetry run pytest --cov=${CI_PROJECT_NAME} --cov-branch --cov-report=term tests/
# - poetry run pytest -ra -q --cov=${CI_PROJECT_NAME} --cov-branch --cov-report xml:coverage.xml tests/
# artifacts:
# expire_in: 1 week
# paths:
# - coverage.xml
# except:
# - tags
# ---------------------------------------------------------------
# SonarQube
# ---------------------------------------------------------------
sonarqube:
stage: Sonarqube
tags: [cerbere-runner]
image:
name: sonarsource/sonar-scanner-cli:latest
entrypoint: [""]
allow_failure: true
script:
- sonar-scanner
-Dsonar.projectKey=${CI_PROJECT_NAME}
-Dsonar.language=py
-Dsonar.host.url=http://visi-common-sonar:9000
-Dsonar.login=e6f816eee72d3d5c03319ec74b468157b9164d12
-Dsonar.sourceEncoding=UTF-8
-Dsonar.python.coverage.reportPaths=coverage.xml
-Dsonar.coverage.exclusions=**__init__**,tests/**
-Dsonar.python.pylint.reportPath=pylint.txt
<<: *code-changes
except:
- tags
#sonarqube:
# stage: Sonarqube
# tags: [cerbere-runner]
# image:
# name: sonarsource/sonar-scanner-cli:latest
# entrypoint: [""]
# allow_failure: true
# script:
# - sonar-scanner
# -Dsonar.projectKey=${CI_PROJECT_NAME}
# -Dsonar.language=py
# -Dsonar.host.url=http://visi-common-sonar:9000
# -Dsonar.login=e6f816eee72d3d5c03319ec74b468157b9164d12
# -Dsonar.sourceEncoding=UTF-8
# -Dsonar.python.coverage.reportPaths=coverage.xml
# -Dsonar.coverage.exclusions=**__init__**,tests/**
# -Dsonar.python.pylint.reportPath=pylint.txt
# <<: *code-changes
# except:
# - tags
# ---------------------------------------------------------------
# Documentation job
......@@ -179,15 +179,15 @@ pages:
except:
- tags
# ---------------------------------------------------------------
# Release job
# ---------------------------------------------------------------
nexus:
<<: *install-conda-deps-38
stage: Deploy
script:
- poetry build --format wheel
- poetry config repositories.nexus-public-release https://nexus-test.ifremer.fr/repository/hosted-pypi-public-release/
- poetry publish -r nexus-public-release -u nexus-ci -p w2bH2NjgFmQnzVk3
only:
- tags
## ---------------------------------------------------------------
## Release job
## ---------------------------------------------------------------
#nexus:
# <<: *install-conda-deps-38
# stage: Deploy
# script:
# - poetry build --format wheel
# - poetry config repositories.nexus-public-release https://nexus-test.ifremer.fr/repository/hosted-pypi-public-release/
# - poetry publish -r nexus-public-release -u nexus-ci -p w2bH2NjgFmQnzVk3
# only:
# - tags
......@@ -24,7 +24,73 @@ CF_AUTHORITY = 'CF-1.7'
def default_fill_value(obj):
"""Returns the default fill value for a specific type"""
return numpy.ma.default_fill_value(obj)
if isinstance(obj, numpy.dtype):
dtype = obj
elif isinstance(obj, (str, type)):
dtype = numpy.dtype(obj)
elif isinstance(obj, numpy.ndarray):
dtype = obj.dtype
else:
raise TypeError("Unexpected object type: ", type(obj), obj)
if dtype.name == 'int16':
return numpy.int16(-32768)
elif dtype.name == 'uint16':
return numpy.uint16(65535)
elif dtype.name == 'int8':
return numpy.int8(-128)
elif dtype.name == 'uint8':
return numpy.uint8(255)
else:
return numpy.ma.default_fill_value(dtype)
def get_masked_values(fieldname, data, fill_value, silent=False):
"""fix masked values. Required as xarray data can't store masked values
or nan for non-float types"""
if fill_value is None and not isinstance(data, numpy.ma.core.MaskedArray):
# no masked data
return data
if isinstance(data, (numpy.ma.core.MaskedArray, numpy.ndarray)):
if data.dtype.name in [
'float16', 'float32', 'float64', 'complex64', 'complex128']:
if fill_value is not None:
data.set_fill_value(fill_value)
return data
elif numpy.issubdtype(data.dtype, numpy.datetime64):
return data
else:
# mask fill values for int types
data = numpy.ma.masked_equal(data, fill_value, copy=False)
elif not silent:
logging.warning(
'values equal to {} are marked as missing values in {}'
.format(fill_value, fieldname))
return data
def set_masked_values(data, fill_value):
"""replace masked values with fill value. Required as xarray data can't
store masked values or nan for non-float types"""
if fill_value is None and not isinstance(data, numpy.ma.core.MaskedArray):
# no masked data
return data
if isinstance(data, numpy.ma.core.MaskedArray):
if data.dtype.name in [
'float16', 'float32', 'float64', 'complex64', 'complex128']:
fill_value = numpy.nan
elif numpy.issubdtype(data.dtype, numpy.datetime64):
fill_value = numpy.datetime64('NaT')
else:
if fill_value is None:
fill_value = data.fill_value
return data.filled(fill_value)
return data
def default_profile(
......
This diff is collapsed.
......@@ -10,6 +10,7 @@ import copy
from collections import OrderedDict
from typing import (Any, Dict, Hashable, Iterable, Iterator, List,
Mapping, Optional, Sequence, Set, Tuple, Union, cast)
import warnings
import numpy
import xarray as xr
......@@ -78,7 +79,12 @@ class Field(object):
fillvalue : the default value to associate with missing data in the
field's values. The fillvalue must be of the same type as
`datatype` and `values`
`datatype` and `values`. If None, default fill value will be used
unless `no_missing_value` is set.
no_missing_value: if True, the field will not contain any missing value.
In practice no _FillValue attribute will be written for this field
when saving it.
attrs (dict) : a dictionary of the metadata associated with the field's
values.
......@@ -107,6 +113,7 @@ class Field(object):
components: Optional[Tuple['Field']] = None,
dataset: Optional['Dataset'] = None,
fillvalue: Optional[Any] = None,
no_missing_value: bool = False,
precision: Optional[int] = None,
description: Optional[str] = None,
standard_name: Optional[Union[str, Tuple[str, str]]] = None,
......@@ -120,8 +127,39 @@ class Field(object):
if name is not None and not isinstance(name, str):
raise TypeError('name must be a string')
if isinstance(data, xr.DataArray):
if (isinstance(data, xr.DataArray)
and '_cerbere_status' in data.encoding):
# simple casting of internal datarrayarray to Field API
self._array = data
return
# dtype
if data is None:
if dtype is None:
raise ValueError(
"If you don't provide any data, you must at least "
"provide a datatype"
)
if dtype is None:
dtype = data.dtype
# fill value
if fillvalue is None and not no_missing_value:
fillvalue = cf.default_fill_value(dtype)
if no_missing_value:
if fillvalue is not None:
warnings.warn(
'fillvalue should not be set if no_missing_value is set. it'
'will be ignored.'
)
fillvalue = None
if isinstance(data, xr.DataArray):
if '_cerbere_status' in data.encoding:
# simple casting from internal array to Field API
self._array = data
else:
self._array = cf.set_masked_values(data, fillvalue)
else:
# create the DataArray from the provided information
......@@ -133,25 +171,27 @@ class Field(object):
if data is None:
# create default array
if dtype is None:
raise ValueError(
"If you don't provide any data, you must at least "
"provide a datatype"
)
if not isinstance(dims, OrderedDict):
raise TypeError(
"dimensions should be provided with their size in a "
"OrderedDict"
)
data = numpy.ma.masked_all(
tuple(dims.values()), dtype)
else:
data = data
if not no_missing_value:
data = numpy.ma.masked_all(tuple(dims.values()), dtype)
data.set_fill_value(fillvalue)
else:
data = numpy.zeros(tuple(dims.values()), dtype)
# instantiate the xarray representation
kwargs['dims'] = list(dims)
kwargs['attrs'] = attrs
self._array = xr.DataArray(data, name=name, **kwargs)
# fix for xarray to keep the data type : replace masked values
# with fill values
self._array = xr.DataArray(
cf.set_masked_values(data, fillvalue),
name=name, **kwargs)
# Overrides DataArray object when conflicts with the superceding
# arguments
......@@ -173,6 +213,7 @@ class Field(object):
self.encoding.update(encoding)
# internal special attributes
self._array.encoding['_no_missing_value'] = no_missing_value
# components for complex fields
if components is not None:
......@@ -192,7 +233,10 @@ class Field(object):
self._array.encoding['_attached_dataset'] = dataset
# @TODO self.handler ???
self._array.encoding['cerbere_status'] = "changed"
if dataset is None:
self._array.encoding['_cerbere_status'] = "new"
else:
self._array.encoding['_cerbere_status'] = "changed"
@classmethod
def to_field(cls, data: xr.DataArray) -> 'Field':
......@@ -201,10 +245,11 @@ class Field(object):
"""
return Field(data=data)
def to_dataarray(self) -> 'xr.DataArray':
def to_dataarray(self, silent=False) -> 'xr.DataArray':
"""Return the field values a xarray DataArray"""
if self.dataset is None:
return self._array
return cf.get_masked_values(
self._array.name, self._array, self.fill_value, silent=silent)
else:
return self.dataset.get_values(
self._array.name,
......@@ -234,7 +279,7 @@ class Field(object):
for att, val in attrs:
if att in FIELD_ATTRS:
result += ' # {} : {}\n'.format(att, val)
result += ' # fill_value : {}\n'.format(self.fill_value)
result += ' # fill_value : {}\n'.format(str(self.fill_value))
# free form attributes
result = result + ' other attributes :\n'
......@@ -286,13 +331,21 @@ class Field(object):
self._array.encoding = attrs
@property
def dims(self) -> Tuple[str, int]:
def sizes(self) -> Mapping[Hashable, int]:
"""A tuple of the field dimensions name and size"""
if self.dataset is None:
return tuple(self._array.dims)
return self._array.sizes
else:
return self.dataset.get_field_sizes(self.name)
@property
def dims(self) -> Tuple[str]:
"""A tuple of the field dimensions name and size"""
if self.dataset is None:
return self._array.dims
else:
return self.dataset.get_field_dims(self.name)
@dims.setter
def dims(self, dims):
if self.dataset is None:
......@@ -303,7 +356,7 @@ class Field(object):
@property
def dimnames(self) -> Tuple[str]:
"""Tuple of the field's dimension names"""
return tuple(self.dims.keys())
return self.dims
def get_dimsize(self, dimname) -> int:
"""Return the size of a field dimension"""
......@@ -418,14 +471,14 @@ class Field(object):
self.attrs['standard_name'] = standard_name[0]
self.attrs['authority'] = standard_name[1]
elif standard_name is not None:
self.attrs['standard_name'] = standard_name[0]
self.attrs['standard_name'] = standard_name
self.attrs['authority'] = cf.CF_AUTHORITY
else:
self.attrs['standard_name'] = None
self.attrs['authority'] = None
@property
def components(self: str):
def components(self):
"""subfield of a multi-array field (such as vector, broken down into
u and v fields)"""
return self._array.encoding['_components']
......@@ -543,11 +596,12 @@ class Field(object):
**kwargs
}
if self.dataset is None:
return numpy.ma.array(
self._read_dataarray(self._array, **allkwargs)
)
data = numpy.ma.array(
self._read_dataarray(self._array, **allkwargs))
else:
return self.dataset.get_values(self.name, **allkwargs)
data = self.dataset.get_values(self.name, **allkwargs)
return cf.get_masked_values(self.name, data, self.fill_value)
@classmethod
def _read_dataarray(
......@@ -603,6 +657,10 @@ class Field(object):
pad with fill values the ``subset`` array extracted from ``array``
where ``index`` is beyond the limits of ``array``.
"""
if len(array.shape) == 0:
# dimensionless field
return subset.to_masked_array()
pad_edges = []
for dim in list(array.dims):
if dim in index:
......@@ -614,12 +672,12 @@ class Field(object):
else:
pad_edges.append([0, 0])
masked_values = subset.to_masked_array()
res = numpy.pad(
subset, pad_edges, 'constant',
constant_values=numpy.nan
masked_values, pad_edges, 'constant',
constant_values=masked_values.fill_value
)
if isinstance(subset, numpy.ma.MaskedArray):
res = numpy.ma.fix_invalid(res, copy=False)
res = numpy.ma.masked_equal(res, masked_values.fill_value, copy=False)
return res
......@@ -652,13 +710,14 @@ class Field(object):
else:
self.dataset.set_values(self.name, values, index=index)
@classmethod
def _set_xrvalues(
cls,
self,
xrdata,
values,
index=None
):
values = cf.set_masked_values(values, self.fill_value)
if index is None:
xrdata.values[:] = values
......@@ -814,7 +873,9 @@ class Field(object):
new_field = Field(data=subarray.copy(deep=True))
# detach from any dataset
new_field.dataset = None
new_field._array.encoding['_attached_dataset'] = None
#new_field._array.encoding['cerbere_src_encoding'] =
# self._array.encoding['cerbere_src_encoding']
if prefix is not None:
new_field.set_name(prefix + new_field.name)
......@@ -828,9 +889,9 @@ class Field(object):
"""
if self.dataset is not None:
if self.name not in self.dataset.get_fieldnames():
if self.name not in self.dataset.fieldnames:
raise ValueError("Field {} not existing".format(self.name))
self.dataset.rename({self.name: newname})
self.dataset.rename_field(self.name, newname)
self.name = newname
def __add__(self, other: 'Field') -> 'Field':
......
......@@ -31,7 +31,7 @@ class NCDataset(Dataset):
*args,
format=NETCDF4,
**kwargs):
return super().__init__(
super().__init__(
*args,
format=format,
**kwargs
......
......@@ -56,7 +56,8 @@ class CRACollection(Feature):
@property
def _feature_geodimnames(self):
return self.feature_class._instance_dimname, 'z',
return (self.feature_class._instance_dimname,) + \
self.feature_class._feature_geodimnames
def get_geocoord_dimnames(
self, fieldname: str,
......
......@@ -206,13 +206,24 @@ class Feature(Dataset, ABC):
return dim_validity
def append(self, feature, prefix, fields=None):
def append(self,
feature,
prefix: str = '',
add_coords: bool = False,
as_new_dims: bool = False,
fields: str = None):
"""Append the fields from another feature
It is assumed the two features do not share any dimension. The appended
feature (child) is considered as ancillary fields and looses its
model properties. The feature model is the one of the receiving
(parent) feature.
The fields can share the same coordinates and dimensions (if the added
feature is an overlay of the current feature). In such case the
shared coordinates of the added feature are not copied, unless
`add_coords` is set to True (in which case they are first prefixed
using `prefix`).
If the dimensions of the added field have unrelated to the current
feature (even if they have similar names), `as_new_dims` must be set
to True. The dimensions (and corresponding coordinates) are prefixed
with `prefix` and added to with the fields to the current feature.
Args:
feature (AbstractFeature derived class): the feature to append.
......@@ -221,47 +232,47 @@ class Feature(Dataset, ABC):
feature (to avoid conflicts with the existing fields of the
current feature).
add_coords: if True, add the feature coordinates as variables. If
false, they are not added.
as_new_dims: rename the field and coordinate dimensions with prefix
fields (list of str): a list of fieldnames specifying which fields
are to be appended. By default, all fields of the feature are
appended.
"""
child_fields = {}
# create new fields
for fieldname in feature.get_geolocation_fields():
newname = ''.join([prefix, fieldname])
if newname in self.fieldnames:
logging.debug("Field already existing: {}".format(newname))
continue
childfield = feature.get_geolocation_field(fieldname)
if feature.get_geolocation_field(fieldname) is not None:
child_fields[newname] = childfield.clone()
# additional _ ensures no field and dimensions have the same
# name
dims = OrderedDict([('_'.join([prefix, k]), v)
for k, v in childfield.dimensions.items()])
child_fields[newname].dimensions = dims