Commit 287d4785 authored by PIOLLE's avatar PIOLLE
Browse files

Initial commit

\ No newline at end of file
""" Module for reading and verifying RSS gridded binary data files. """
import copy
import decimal
import gzip
import numpy as np
import sys
from collections import namedtuple
from collections import OrderedDict
from operator import mul
from functools import reduce
import numpy
class Dataset:
""" Base class for bytemap datasets. """
Public data:
filename = name of data file
missing = fill value used for missing data;
if None, then fill with byte codes (251-255)
dimensions = dictionary of dimensions for each coordinate
variables = dictionary of data for each variable
All classes derived from Dataset must implement the following:
_attributes() = returns list of attributes for each variable (list)
_coordinates() = returns coordinates (tuple)
_shape() = returns shape of raw data (tuple)
_variables() = returns list of all variables to get (list)
The derived class must provide "_get_" methods for the attributes.
If the derived class provides "_get_" methods for the variables,
those methods receive first priority.
The "_get_" methods in this module receive second priority.
The last priority is "_default_get", which requires:
_get_index(var) = returns bmap index for var
_get_scale(var) = returns bmap scale for var
_get_offset(var) = returns bmap offset for var
def __init__(self):
self.dimensions = self._get_dimensions()
self.variables = self._get_variables()
def _default_get(self,var,bmap):
data = get_data(bmap,self._get_index(var))
acopy = copy.deepcopy(data)
bad = is_bad(data)
try: data *= self._get_scale(var)
except _NoValueFound: pass
try: data += self._get_offset(var)
except _NoValueFound: pass
if self.missing == None: data[bad] = acopy[bad]
else: data[bad] = self.missing
return data
def _dtype(self): return np.uint8
def _get(self,var):
try: return _get_(var,_from_=self)
except _NoMethodFound: pass
try: return _get_(var,_from_=thismodule())
except _NoMethodFound: pass
return self._default_get
def _get_avariable(self,var,data):
variable = self._get(var)(var,data)
return variable
#return Variable(var,variable,self)
def _get_coordinates(self,var=None):
if not var: return self._coordinates()
if var in self._coordinates(): return (var,)
return tuple([c for c in self._coordinates() if c != 'variable'])
def _get_dimensions(self):
dims = OrderedDict(zip(self._coordinates(),self._shape()))
del dims['variable']
return dims
def _get_variables(self):
data = OrderedDict()
if '.gz' in self.filename:
stream = readgz(self.filename)
bmap = unpack(stream, shape=self._shape(), dtype=self._dtype())
bmap = numpy.fromfile(self.filename,
print bmap.shape, self._shape
bmap = bmap.reshape(self._shape())
return data
for var in self._variables():
data[var] = self._get_avariable(var, bmap)
return data
def readgz(filename):
f =, 'rb')
stream =
return stream
def thismodule(): return sys.modules[__name__]
def unpack(stream,shape,dtype):
count = reduce(mul,shape)
return np.fromstring(stream,dtype=dtype,count=count).reshape(shape)
""" Library of Methods for _get_ Functions: """
def btest(ival,ipos):
"""Same usage as Fortran btest function."""
return ( ival & (1 << ipos) ) != 0
def cosd(x): return np.cos(np.radians(x))
def get_data(bmap,indx,dtype=np.float64):
"""Return numpy array of dytpe for variable in bmap given by indx."""
return np.array(np.squeeze(bmap[...,indx,:,:]),dtype=dtype)
def get_uv(speed,direction):
Given speed and direction (degrees oceanographic),
return u (zonal) and v (meridional) components.
u = speed * sind(direction)
v = speed * cosd(direction)
return u, v
def ibits(ival,ipos,ilen):
"""Same usage as Fortran ibits function."""
ones = ((1 << ilen)-1)
return ( ival & (ones << ipos) ) >> ipos
def is_bad(bmap,maxvalid=250):
"""Return mask where data are bad."""
return bmap > maxvalid
def sind(x): return np.sin(np.radians(x))
where = np.where
""" Library of Named Exceptions: """
_NoMethodFound = AttributeError
_NoValueFound = (AttributeError,KeyError)
_NotFound = AttributeError
""" Library of Named _get_ Functions: """
def _get_(var,_from_):
return getattr(_from_,'_get_'+var)
def _get_ice(var,bmap,indx=0,icevalue=252):
return get_data(bmap,indx,dtype=bmap.dtype) == icevalue
def _get_land(var,bmap,indx=0,landvalue=255):
return get_data(bmap,indx,dtype=bmap.dtype) == landvalue
def _get_latitude(var,bmap,nlat=720,dlat=0.25,lat0=-89.875):
if np.shape(bmap)[-2] != nlat: sys.exit('Latitude mismatch')
return np.array([dlat*ilat + lat0 for ilat in range(nlat)])
def _get_longitude(var,bmap,nlon=1440,dlon=0.25,lon0=0.125):
if np.shape(bmap)[-1] != nlon: sys.exit('Longitude mismatch')
return np.array([dlon*ilon + lon0 for ilon in range(nlon)])
def _get_nodata(var,bmap,indx=0):
return is_bad(get_data(bmap,indx,dtype=bmap.dtype))
class Variable(np.ndarray):
""" Variable exists solely to subclass numpy array with attributes. """
def __new__(cls,var,data,dataset):
obj = np.asarray(data).view(cls)
for attr in dataset._attributes():
get = _get_(attr,_from_=dataset)
return obj
class Verify:
""" Base class for bytemap read verification. """
Public data:
data = OrderedDict of OneOb-namedtuple lists for each variable
success = True/False
The derived class must supply the following:
For all files:
filename = name of verify file
variables = list of variables to verify
The following indices (1-based):
ilon1 = longitude index
ilon2 = longitude index
ilat1 = latitude index
ilat2 = latitude index
iasc = asc/dsc index (daily only)
For files organized as a list:
startline = starting line number of data (integer)
columns = column numbers for each variable (dictionary)
For files organized as arrays:
startline = starting line number of data for each variable (dict)
The startline and columns are counting starting from 1.
def __init__(self,dataset):
self._file = [tokenize(line) for line in readtext(self.filename)] = self._get_data()
self.success = verify(dataset,self)
def _asc(self):
try: return zerobased(self.iasc)
except _NotFound: return Ellipsis
def _get_avariable(self,var):
data = []
indices = np.ndindex(self._nlat(),self._nlon())
for ilat,ilon in indices:
return data
def _get_data(self):
data = OrderedDict()
for var in self.variables:
data[var] = self._get_avariable(var)
return data
def _get_line_word(self,var,ilon,ilat):
if self._islist(): return self._get_line_word_list(var,ilon,ilat)
else: return self._get_line_word_array(var,ilon,ilat)
def _get_line_word_array(self,var,ilon,ilat):
iline = zerobased(self.startline[var]) + ilat
iword = ilon
return iline,iword
def _get_line_word_list(self,var,ilon,ilat):
iline = zerobased(self.startline) + ilat*self._nlon() + ilon
iword = zerobased(self.columns[var])
return iline,iword
def _get_oneob(self,var,ilon,ilat):
iline,iword = self._get_line_word(var,ilon,ilat)
avalue = self._file[iline][iword]
return OneOb(self._lon(ilon), self._lat(ilat), self._asc(),
float(avalue), places(avalue))
def _islist(self): return hasattr(self,'columns')
def _lat(self,ilat): return zerobased(self.ilat1) + ilat
def _lon(self,ilon): return zerobased(self.ilon1) + ilon
def _nlat(self): return self.ilat2 - self.ilat1 + 1
def _nlon(self): return self.ilon2 - self.ilon1 + 1
OneOb = namedtuple('OneOb','lon lat asc val ndp')
OneOb corresponds to one observation from verify file with:
lon = longitude index
lat = latitude index
asc = ascending/descending index
val = verify value
ndp = number of decimal places given in verify
The (asc,lat,lon) indices are 0-based.
def places(astring):
Given a string representing a floating-point number,
return number of decimal places of precision (note: is negative).
return decimal.Decimal(astring).as_tuple().exponent
def readtext(filename):
f = open(filename,'r')
lines = f.readlines()
return lines
def tokenize(line): return [item.strip() for item in line.split()]
def verify(dataset,verify):
""" Verify data were read correctly. """
Required arguments:
dataset = a read Dataset instance
verify = a Verify instance
success = True or False
success = True
for var in verify.variables:
for ob in[var]:
readval = dataset.variables[var][ob.asc,, ob.lon]
diff = abs(ob.val - readval)
match = diff < pow(10,ob.ndp)
if not match: success = False
print( ' '.join([str(ob.lon), str(, str(var),
str(ob.val), str(readval), str(diff), str(match)]) )
return success
def zerobased(indx): return indx-1
if __name__ == '__main__':
link = ''
print('Remote Sensing Systems')
print('444 Tenth Street, Suite 200')
print('Santa Rosa, CA 95401, USA')
print('Terms of Data Use: '+link)
# -*- coding: utf-8 -*-
.. module::cerbere.mapper.rsswindsatl2file
Mapper class for RSS Windsat L2 format. Uses some code
provided by RSS.
:copyright: Copyright 2013 Ifremer / Cersat.
:license: Released under GPL v3 license, see :ref:`license`.
.. sectionauthor:: Jeff Piolle <>
.. codeauthor:: Jeff Piolle <>
from collections import OrderedDict
import os
from copy import copy
from datetime import datetime, timedelta
import numpy
from netCDF4 import default_fillvals
from cerbere.mapper.abstractmapper import AbstractMapper
from cerbere.mapper.slices import Slices, format_slices
from cerbere.datamodel.field import Field
from cerbere.datamodel.variable import Variable
from bytemaps import Dataset
class WindSatDaily(Dataset):
def __init__(self, filename, missing=-999.):
Required arguments:
filename = name of data file to be read (string)
Optional arguments:
missing = fill value for missing data,
default is the value used in verify file
self.filename = filename
self.missing = missing
def _attributes(self):
return ['coordinates', 'long_name', 'units',
'valid_min', 'valid_max']
def _coordinates(self):
return ('orbit_segment', 'variable', 'latitude', 'longitude')
def _shape(self):
return (2, 9, 720, 1440)
def _variables(self):
return ['time', 'sst', 'w-lf', 'w-mf', 'vapor', 'cloud', 'rain',
'w-aw', 'wdir', 'land', 'ice']
def _get_dtype(self, var):
return {'time': 'f4',
'sst': 'f8',
'w-lf': 'f8',
'w-mf': 'f8',
'vapor': 'f8',
'cloud': 'f8',
'rain': 'f8',
'w-aw': 'f8',
'wdir': 'f8',
'lon': 'f8',
'lat': 'f8',
'land': 'i1',
'ice': 'i1',
'nodata': 'i1',
def _get_index(self, var):
return {'time': 0,
'sst': 1,
'w-lf': 2,
'w-mf': 3,
'vapor': 4,
'cloud': 5,
'rain': 6,
'w-aw': 7,
'wdir': 8,
def _get_scale(self, var):
scales = {'time': 6.,
'sst': 0.15,
'w-lf': 0.2,
'w-mf': 0.2,
'vapor': 0.3,
'cloud': 0.01,
'rain': 0.1,
'w-aw': 0.2,
'wdir': 1.5,
if var in scales:
return scales[var]
return 1
def _get_offset(self, var):
offsets = {'sst': -3.0,
'cloud': -0.05,
if var in offsets:
return offsets[var]
return 0
# _get_ attributes:
def _get_long_name(self, var):
return {'time': 'pixel time in UTC',
'sst': 'Sea Surface Temperature',
'w-lf': '10-m Surface Wind Speed (low frequency)',
'w-mf': '10-m Surface Wind Speed (medium frequency)',
'vapor': 'Columnar Water Vapor',
'cloud': 'Cloud Liquid Water',
'rain': 'Surface Rain Rate',
'w-aw': 'All-Weather 10-m Surface Wind Speed',
'wdir': 'Surface Wind Direction',
'lon': 'Grid Cell Center Longitude',
'lat': 'Grid Cell Center Latitude',
'land': 'Is this land?',
'ice': 'Is this ice?',
'nodata': 'Is there no data?',
def _get_units(self, var):
return {'time': 'seconds since 2000-01-01 00:00:00',
'sst': 'deg Celsius',
'w-lf': 'm s-1',
'w-mf': 'm s-1',
'vapor': 'mm',
'cloud': 'mm',
'rain': 'mm hr-1',
'w-aw': 'm s-1',
'wdir': 'deg oceanographic',
'lon': 'degrees_east',
'lat': 'degrees_north',
'land': 'True or False',
'ice': 'True or False',
'nodata': 'True or False',
def _get_valid_min(self, var):
return {'time': 0.0,
'sst': -3.0,
'w-lf': 0.0,
'w-mf': 0.0,
'vapor': 0.0,
'cloud': -0.05,
'rain': 0.0,
'w-aw': 0.2,
'wdir': 0.0,
'lon': 0.0,
'lat': -90.0,
'land': 0,
'ice': 0,
'nodata': 0,
def _get_valid_max(self, var):
return {'time': 1440.0,
'sst': 34.5,
'w-lf': 50.0,
'w-mf': 50.0,
'vapor': 75.0,
'cloud': 2.45,
'rain': 25.0,
'w-aw': 50.0,
'wdir': 360.0,
'lon': 360.0,
'lat': 90.0,
'land': 1,
'ice': 1,
'nodata': 1,
class RSSWindSatL2File(AbstractMapper):
The RSS files contain both ascending and descending passes in two separate
grids. You must specify which grid you want to read either by:
* adding '.asc' or '.desc' to the filename
* using passdir argument, with 'asc' or 'desc' value
passdir ('asc' or 'desc'): passes read from the file (ascending or
DIMENSIONS = OrderedDict([('y', 720),
('x', 1440)])
PASS = {'asc': 0, 'desc': 1}
TIMEORIGIN = datetime(2000, 1, 1)
def __init__(self, url=None, passdir=None, **kwargs):
super(RSSWindSatL2File, self).__init__(
if url.endswith('.asc'):
self.passdir = self.PASS['asc']
elif url.endswith('.desc'):
self.passdir = self.PASS['desc']
elif passdir is not None:
self.passdir = self.PASS[passdir]
raise Exception("The type of passes to be read must be specified")
self.__globalattributes = None
self._startdate = datetime.strptime(
def open(self, view=None, datamodel=None, datamodel_geolocation_dims=None):
"""Open the file (or any other type of storage)
view (dict, optional): a dictionary where keys are dimension names
and values are slices. A view can be set on a file, meaning
that only the subset defined by this view will be accessible.
This view is expressed as any subset (see :func:`get_values`).
For example:
view = {'time':slice(0,0), 'lat':slice(200,300),
datamodel (str): type of feature read or written. Internal argument
only used by the classes from :mod:`~cerbere.datamodel`
package. Can be 'Grid', 'Swath', etc...
datamodel_geolocation_dims (list, optional): list of the name of
the geolocation dimensions defining the data model to be read
in the file. Optional argument, only used by the datamodel
classes, in case the mapper class can store different types of
data models.
a handler on the opened file
dims = self.DIMENSIONS.values()
dims.insert(0, 2)
newview = Slices(format_slices(view, self.DIMENSIONS.keys()),
newview = Slices((slice(self.passdir, self.passdir + 1),) + newview,
super(RSSWindSatL2File, self).open(newview, datamodel,
self._handler = WindSatDaily(self._url)
def close(self):
"""Close handler on storage"""
def __check_open(self):
if self._handler is None:
raise IOError('File is not open')
def get_matching_dimname(self, dimname):
"""Return the equivalent name in the native format for a standard
This is a translation of the standard names to native ones. It is used
for internal purpose only and should not be called directly.
The standard dimension names are:
* x, y, time for :class:`~cerbere.datamodel.grid.Grid`
* row, cell, time for :class:`~cerbere.datamodel.swath.Swath` or
To be derived when creating an inherited data mapper class. This is
mandatory for geolocation dimensions which must be standard.