Commit 4a408af0 authored by Jeff Piollé's avatar Jeff Piollé
Browse files

initial version

\ No newline at end of file
\ No newline at end of file
# -*- coding: utf-8 -*-
.. module::cerbere.mapper.safeslfile
Class to read Sentinel-3 OLCI files.
:copyright: Copyright 2015 Ifremer / Cersat.
:license: Released under GPL v3 license, see :ref:`license`.
.. sectionauthor:: Jeff Piolle <>
.. codeauthor:: Jeff Piolle <>
import os
import glob
from numpy import ma, dtype, int64
from collections import OrderedDict
from netCDF4 import num2date
from .. import READ_ONLY
from .abstractmapper import AbstractMapper
from cerbere.datamodel.variable import Variable
from cerbere.datamodel.field import Field
from cerbere.mapper.ncfile import NCFile
"L1B": ["*"],
"L2LAND": ["", "", "", "", ""],
"L2WATER": ["", "", "", "*",
"", "", "", "", ""]
class SAFEOLFile(AbstractMapper):
"""Abstract class for SAFE OLCI files.
This mapper concatenates together the files within a SAFE folder that
share the same dimensions.
url: the path to the product SAFE folder
L1B,, *,
L2 LAND,,,,,,,
L2 WATER,, chl_nn,,,
*,,, tsm_nn,,,
def __init__(self, url=None, mode=READ_ONLY, **kwargs):
if mode != READ_ONLY:
raise Exception("This mapper can only be used in read_only mode.")
super(SAFEOLFile, self).__init__(url=url, mode=mode, **kwargs)
self.__data_handlers = []
self.__fields = {}
# ancillary files
cartesian = ""
instrument = ""
times = ""
# handlers for ancillary fields
self.__time_handler = NCFile(os.path.join(url, times),
mode=mode, **kwargs)
self.__fields[times] = []
self.__coord_handler = NCFile(os.path.join(url, cartesian),
mode=mode, **kwargs)
self.__fields[cartesian] = []
self.__instr_handler = NCFile(os.path.join(url, instrument),
mode=mode, **kwargs)
self.__fields[instrument] = []
# get product type
safefolder = os.path.basename(os.path.normpath(url))
print safefolder
if "_OL_1_ERR" in safefolder or "_OL_1_EFR" in safefolder:
datafiles = DATAFILES["L1B"]
elif "OL_2_LRR" in safefolder or "OL_2_LFR" in safefolder:
datafiles = DATAFILES["L2LAND"]
elif "OL_2_WRR" in safefolder or "OL_2_WFR" in safefolder:
datafiles = DATAFILES["L2WATER"]
raise Exception("Unknown product type")
# detect the data files and instanciate mappers for each one
for f in datafiles:
if '*' in f:
fnames = glob.glob(os.path.join(url, f))
for fname in fnames:
self.__data_handlers.append(NCFile(url=fname, mode=mode,
self.__fields[f] = []
fname = os.path.join(url, f)
self.__data_handlers.append(NCFile(url=fname, mode=mode,
self.__fields[f] = []
self.__fieldlocator = {}
self.__geodfieldlocator = {}
def open(self,
view (dict, optional): a dictionary where keys are dimension names
and values are slices. A view can be set on a file, meaning
that only the subset defined by this view will be accessible.
This view is expressed as any subset (see :func:`get_values`).
For example::
view = {'row':slice(200,250), 'cell':slice(200,300)}
datamodel (str): type of feature read or written. Internal argument
only used by the classes from :mod:`~cerbere.datamodel`
package. Can be 'Grid', 'Swath', etc...
datamodel_geolocation_dims (list, optional): list of the name of the
geolocation dimensions defining the data model to be read in
the file. Optional argument, only used by the datamodel
classes, in case the mapper class can store different types of
data models.
an handler on the opened file
# open each related file in the SAFE repo
if view is None:
rowview = None
rowview = {'row': view['row']}
for hdlr in self.__data_handlers:, datamodel,
datamodel_geolocation_dims), datamodel,
datamodel_geolocation_dims), datamodel,
datamodel_geolocation_dims), datamodel,
# build the two-way dictionaries of fields
# ...for data
for hdlr in self.__data_handlers:
= hdlr.get_fieldnames()
for fieldname in hdlr.get_fieldnames():
self.__fieldlocator[fieldname] = hdlr
def close(self):
"""Close handler on storage"""
for hdlr in self.__data_handlers:
self.__data_handlers = None
self.__coord_handler = None
self.__time_handler = None
self.__instr_handler = None
def get_dimsize(self, dimname):
"""Return the size of a dimension.
dimname (str): name of the dimension.
int: size of the dimension.
dim = self.get_matching_dimname(dimname)
return self.__coord_handler.get_dimsize(dim)
def get_dimensions(self, fieldname=None):
"""Return the dimension names of a file or a field in the
file. For temporal and spatial dimensions, the cerbere standard names
are returned.
fieldname (str): the name of the field from which to get the
dimensions. For a geolocation field, use the cerbere standard
name (time, lat, lon), though native field name will work too.
tuple<str>: the standard dimensions of the field or file.
if fieldname is None:
return self.__coord_handler.get_dimensions()
if fieldname in ['time', 'lat', 'lon']:
# Should all have the same dimension as lat
native_fieldname = self.get_geolocation_field('lat')
dims = self.__coord_handler.get_dimensions(native_fieldname)
handler = self.__fieldlocator[fieldname]
dims = handler.get_dimensions(fieldname)
# convert geolocation dims to standard names
newdims = []
for dim in list(dims):
return tuple(newdims)
def get_matching_dimname(self, dimname):
"""Return the equivalent name in the native format for a standard
This is a translation of the standard names to native ones. It is used
for internal purpose only and should not be called directly.
The standard dimension names are:
* x, y, time for :class:`~cerbere.datamodel.grid.Grid`
* row, cell, time for :class:`~cerbere.datamodel.swath.Swath` or
To be derived when creating an inherited data mapper class. This is
mandatory for geolocation dimensions which must be standard.
dimname (str): standard dimension name.
str: return the native name for the dimension. Return `dimname` if
the input dimension has no standard name.
See Also:
see :func:`get_standard_dimname` for the reverse operation
matching = {'time': 'time', 'row': 'rows', 'cell': 'columns'}
if dimname in matching:
return matching[dimname]
return dimname
def get_standard_dimname(self, dimname):
Returns the equivalent standard dimension name for a
dimension in the native format.
This is a translation of the native names to standard ones. It is used
for internal purpose and should not be called directly.
To be derived when creating an inherited data mapper class. This is
mandatory for geolocation dimensions which must be standard.
dimname (string): native dimension name
str: the (translated) standard name for the dimension. Return
`dimname` if the input dimension has no standard name.
See Also:
see :func:`get_matching_dimname` for the reverse operation
matching = {'time': 'time', 'rows': 'row', 'columns': 'cell'}
if dimname in matching:
return matching[dimname]
return dimname
def get_fieldnames(self):
"""Returns the list of geophysical fields stored for the feature.
The geolocation field names are excluded from this list.
list<string>: list of field names
return self.__fieldlocator.keys()
def __get_native_fieldname(self, fieldname):
"""Returns the native name of a field.
fieldname (str): name of the field.
str: the native name of the field. The same as input
if the field is not a geolocation field.
if fieldname in ['lat', 'lon', 'time', 'z']:
return self.get_geolocation_field(fieldname)
return fieldname
def get_geolocation_field(self, fieldname):
"""Return the equivalent field name in the file format for a standard
geolocation field (lat, lon, time, z).
Used for internal purpose and should not be called directly.
fieldname (str): name of the standard geolocation field (lat, lon
or time)
str: name of the corresponding field in the native file format.
Returns None if no matching is found
MATCHES = {'lat': 'latitude', 'lon': 'longitude', 'time': 'time'}
if fieldname in MATCHES:
return MATCHES[fieldname]
return None
def read_field(self, fieldname):
Return the :class:`cerbere.field.Field` object corresponding to
the requested fieldname.
The :class:`cerbere.field.Field` class contains all the metadata
describing a field (equivalent to a variable in netCDF).
fieldname (str): name of the field
:class:`cerbere.field.Field`: the corresponding field object
if fieldname == 'time':
rows = self.get_dimsize('row')
cols = self.get_dimsize('cell')
variable = Variable(
description='time of measurement',
field = Field(
OrderedDict([('row', rows), ('cell', cols)]),
field.units = self.__time_handler.get_handler().\
return field
elif fieldname in ['lat', 'lon']:
native_name = self.get_geolocation_field(fieldname)
geofield = self.__coord_handler.read_field(
) = fieldname
return geofield
native_name = self.__get_native_fieldname(fieldname)
return self.__fieldlocator[native_name].read_field(native_name)
def read_values(self, fieldname, slices=None):
"""Read the data of a field.
fieldname (str): name of the field which to read the data from
slices (list of slice, optional): list of slices for the field if
subsetting is requested. A slice must then be provided for each
field dimension. The slices are relative to the opened view
(see :func:open) if a view was set when opening the file.
MaskedArray: array of data read. Array type is the same as the
storage type.
native_name = self.__get_native_fieldname(fieldname)
if fieldname == 'time':
if slices is not None:
tslices = [slices[0]]
tslices = slices
time = self.__time_handler.read_values('time_stamp',
# reshape as a 2D field
rows = self.get_dimsize('row')
cols = self.get_dimsize('cell')
if slices is None:
shape = (cols, rows)
newslices = self._fill_slices(slices, (rows, cols))
shape = (newslices[1].stop - newslices[1].start,
newslices[0].stop - newslices[0].start)
time = ma.resize(time, shape).transpose()
return time
elif fieldname in ['lat', 'lon']:
return self.__coord_handler.read_values(native_name,
return self.__fieldlocator[native_name].read_values(native_name,
def read_fillvalue(self, fieldname):
"""Read the fill value of a field.
fieldname (str): name of the field.
number or char or str: fill value of the field. The type is the
as the type of the data in the field.
return self.__fieldlocator[fieldname].read_fillvalue(fieldname)
def read_global_attributes(self):
"""Returns the names of the global attributes.
list<str>: the list of the attribute names.
# all files seem to have the same list og global attributes.
return self.__coord_handler.read_global_attributes()
def read_global_attribute(self, name):
"""Returns the value of a global attribute.
name (str): name of the global attribute.
str, number or datetime: value of the corresponding attribute.
# all files seem to have the same list or global attributes.
return self.__coord_handler.read_global_attribute(name)
def read_field_attributes(self, fieldname):
"""Return the specific attributes of a field.
fieldname (str): name of the field.
dict<string, string or number or datetime>: a dictionary where keys
are the attribute names.
return self.__fieldlocator[fieldname].read_field_attributes(fieldname)
def get_start_time(self):
"""Returns the minimum date of the file temporal coverage.
datetime: start time of the data in file.
varname = 'time_stamp'
vardate = self.__time_handler.get_handler().variables[varname]
return num2date(vardate[0], vardate.units)
def get_end_time(self):
"""Returns the maximum date of the file temporal coverage.
datetime: end time of the data in file.
# WRONG!!!
varname = 'time_stamp'
vardate = self.__time_handler.get_handler().variables[varname]
return num2date(vardate[-1], vardate.units)
def get_bbox(self):
"""Returns the bounding box of the feature, as a tuple.
tuple: bbox expressed as (lonmin, latmin, lonmax, latmax)
return None
def write_global_attributes(self, attrs):
"""Write the global attributes of the file.
attrs (dict<string, string or number or datetime>): a dictionary
containing the attributes names and values to be written.
raise NotImplementedError
def create_field(self, field, dim_translation=None):
"""Creates a new field in the mapper.
Creates the field structure but don't write yet its values array.
field (Field): the field to be created.
See also:
:func:`write_field` for writing the values array.
raise NotImplementedError
def create_dim(self, dimname, size=None):
"""Add a new dimension.
dimname (str): name of the dimension.
size (int): size of the dimension (unlimited if None)
raise NotImplementedError
def write_field(self, fieldname):
"""Writes the field data on disk.
fieldname (str): name of the field to write.
raise NotImplementedError
This diff is collapsed.
__author__ = 'igort'
Sentinel-3 manifest reader - command line utility to extract information from manifest file and print to console -i <path to S3 PDU> -c <config file> - full example of reading manifest file using default xml configuration file - full example of reading manifest file using different xml configuration
s3tools/ - module containing functions for reading data from manifest file using xpath syntax stored in external XML configuration file
test - directory with the S3 product name and manifest file inside
""" Module for reading Sentinel 3 manifest files.
Reads information from S3 manifest xml (xfdumanifest.xml) file and stores element values inside Dictionary (Ordered) object.
Input XML configuration file (Manifest config file) is used to define what information should be retrieved from the manifest file.
It uses xpath syntax to define path to element and short description that is used as key in the dictionary.
Example of access to platform family name:
<xpath name="platform_familyName">/xfdu:XFDU/metadataSection/metadataObject[@ID="platform"]/metadataWrap/xmlData/sentinel-safe:platform/sentinel-safe:familyName</xpath>
and the key/value output:
platform_familyName : Sentinel-3
Input XML config file is divided in sections for easier grouping:
<xpaths ns="slstr">
Default xml config file has the following name: <manifest_config.xml> and is located in the directory of this module.
__author__ = 'igort'
from lxml import etree
import os
from collections import OrderedDict
# namespaces in S3 SAFE manifest xml
MFS_NAMESPACES = {'xfdu': 'urn:ccsds:schema:xfdu:1',
'sentinel-safe': '',
'gml': '',
'sentinel3': '',
'slstr': '',
'olci': '',
'sral': ''}
# path to default configuration manifest file where xpaths are defined
FN_MANIFEST_CONFIG_DEFAULT = os.path.join(os.path.dirname(__file__),
# name of the S3 SAFE manifest files
FN_MANIFEST_XML = 'xfdumanifest.xml'
# xpath to get instrument abbrev - needed to get instrument specific xpaths from config file
XPATH_INSTRUMENT_ABBREV = '/xfdu:XFDU/metadataSection/metadataObject[@ID="platform"]/metadataWrap/xmlData/sentinel-safe:platform/sentinel-safe:instrument/sentinel-safe:familyName/@abbreviation'
# list of keys to exclude from printing to the screen/file
PRINT_EXCLUDE_KEYS = ['footPrint_posList', 's3_safe']
def parse(fn_s3, tree=None, namespaces=MFS_NAMESPACES, fn_config=None):
""" parse manifest file - main function
:param fn_s3: s3 filename
:param tree: tree from xml
:param dict_namespaces: namespaces
:param fn_config: config xml file
:return: out - dictioniary of xpaths/values pairs
if tree is None:
fn_xml = os.path.join(fn_s3, FN_MANIFEST_XML)
tree = etree.parse(fn_xml)
# get instrument
instr = get_instrument_name(fn_s3, tree=tree)
if fn_config is None:
# get default config file
# get xpaths from config file
xpath_arr = _get_xpaths_from_config(fn_config, namespaces=['default', instr])
# get data from manifest based on xpaths
out = get_from_manifest(fn_s3, xpath_arr, tree=tree, namespaces=namespaces)
return out, tree
def _get_xpaths_from_config(fn_config, namespaces=['default']):
""" get xpath list from xml config file
:param fn_config:
:param ns:
xml_cfg = etree.parse(fn_config)
out = []
for namespace in namespaces:
xpath = "/manifest_xpaths/xpaths[@ns='{0}']".format(namespace)
t = xml_cfg.xpath(xpath)
for xp in t[0].getchildren():
out.append([xp.attrib['name'], xp.text])
return out
def _get_from_manifest(fn, xpath, dict_namespaces, tree=None):
""" get single xpath value from manifest file
:param fn: file name
:param xpath: xpath
:param dict_namespaces: dictionary of namespaces
:param tree: manifest xml tree
if tree is None:
fn_xml = os.path.join(fn, FN_MANIFEST_XML)