Commit d64fad04 authored by ALVISET's avatar ALVISET
Browse files

Packed all necessary prov data in dictionary

parent 5e3dbdf6
......@@ -28,7 +28,10 @@ def truncate(f, n):
return '.'.join([i, (d + '0' * n)[:n]])
def numpy_chararray_to_string(char_array):
return char_array[:].tostring().strip().decode("UTF-8")
if type(char_array[:]) == bytes:
return char_array[:]
else:
return char_array[:].tostring().strip().decode("UTF-8")
def format_netcdf_datetime_to_xsd(datetime):
return "".join([datetime[0:4], "-", datetime[4:6], "-", datetime[6:8],
......@@ -58,15 +61,30 @@ def calculate_age(timestamp):
return decimal.Decimal(age).quantize(decimal.Decimal('0.01'), rounding=decimal.ROUND_DOWN)
def netcdf_extract_variable(netcdf, variable, start=None, end=None):
return numpy_chararray_to_string(netcdf.variables[variable][start:end])
try:
return numpy_chararray_to_string(netcdf.variables[variable][start:end])
except KeyError:
return False
def netcdf_extract_variable_cell(netcdf, variable, row, column=None):
if column != None:
return numpy_chararray_to_string(netcdf.variables[variable][row][column])
else:
return numpy_chararray_to_string(netcdf.variables[variable][row])
def profile_get_netcdf_info(file):
try:
if column != None:
return numpy_chararray_to_string(netcdf.variables[variable][row][column])
else:
return numpy_chararray_to_string(netcdf.variables[variable][row])
except KeyError:
return False
def netcdf_getparams(netcdf, rows, columns):
params = []
for i in range(0, rows-1):
for j in range(0, columns):
param = netcdf_extract_variable_cell(netcdf, "STATION_PARAMETERS", i, j)
if param != '' and param not in params:
params.append(param)
return params
def profile_get_netcdf_info(file, data_type):
infos = {}
try:
rootgrp = Dataset(file, "r", format="NETCDF4")
......@@ -78,27 +96,51 @@ def profile_get_netcdf_info(file):
steps = {"steps":[]}
states = {"datastate":[]}
softwares = {"softwares":[]}
adjusted_params = {"qc":[]}
params = netcdf_getparams(rootgrp, rootgrp.dimensions["N_PROF"].size, rootgrp.dimensions["N_PARAM"].size)
adjusted_params = {"adjustedParams":{}}
if data_type != "R":
for param in params:
adjusted_params["adjustedParams"][param] = 0
for j in range(0, rootgrp.dimensions["N_PROF"].size):
state = netcdf_extract_variable_cell(rootgrp, "DATA_STATE_INDICATOR", j)
if state != '' and state not in states["datastate"]:
states["datastate"].append(state)
for i in range(0, rootgrp.dimensions["N_HISTORY"].size):
step = netcdf_extract_variable_cell(rootgrp, "HISTORY_STEP", i, j)
software = netcdf_extract_variable_cell(rootgrp, "HISTORY_SOFTWARE", i, j)
if step != '' and step not in steps["steps"]:
steps["steps"].append(step)
if software != '' and software not in softwares["softwares"]:
softwares["softwares"].append(software)
for i in range(0, rootgrp.dimensions["N_LEVELS"].size):
flag = netcdf_extract_variable(rootgrp, "TEMP_ADJUSTED_QC")
if flag != '' and flag not in adjusted_params["qc"]:
adjusted_params["qc"].append(flag)
try:
for i in range(0, rootgrp.dimensions["N_HISTORY"].size):
step = netcdf_extract_variable_cell(rootgrp, "HISTORY_STEP", i, j)
software = netcdf_extract_variable_cell(rootgrp, "HISTORY_SOFTWARE", i, j)
if step != '' and step not in steps["steps"]:
steps["steps"].append(step)
if software != '' and software not in softwares["softwares"]:
softwares["softwares"].append(software)
# print(rootgrp.dimensions["N_LEVELS"].size-1)
if data_type != "R":
check = True
for i in range(0, rootgrp.dimensions["N_LEVELS"].size-1):
if check:
for param in params:
# print("{} {} {}".format(file, i, param))
try:
flag = netcdf_extract_variable_cell(rootgrp, param + "_ADJUSTED_QC", j, i)
except IndexError:
check = False
break
if not flag:
break
if flag != None and flag != '' and int(flag) > adjusted_params["adjustedParams"][param]:
# print("{} {}".format(file, int(flag)))
adjusted_params["adjustedParams"][param] = int(flag)
except KeyError:
# print("{} has no N_HISTORY param?".format(file))
return
infos.update(steps)
infos.update(states)
infos.update(softwares)
infos.update(adjusted_params)
print(infos)
# print(file)
# print(infos)
# print("------------------")
return infos
def map_filetype_and_datamode(cycle_files):
"""
......@@ -130,6 +172,7 @@ def map_file_to_prov(file, cycle_uri, wrapper, files_info, dac_uri):
link_type = {
"R": ARGO.coreArgoProfile,
"D": ARGO.coreArgoProfile,
"B": ARGO.bcgArgoProfile,
"S": ARGO.syntheticArgoProfile
}
......@@ -155,12 +198,16 @@ def map_file_to_prov(file, cycle_uri, wrapper, files_info, dac_uri):
except KeyError:
pass
# def add_prov_triples(wrapper, prov):
# for
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("destination", type=str, help="Absolute path where the converted files will be written")
parser.add_argument("netCDF", type=str, help="Absolute path of the dac folder location")
parser.add_argument("--noprov", type=bool, nargs="?", const=True, default=False, help="Don't look into netCDF files")
parser.add_argument("--dac", type=str, nargs="*", help="Convert only floats metadata from the specified dacs")
parser.add_argument("--limit", "-l", type=int, help="Limit the number of floats converted per DAC")
parser.add_argument("--local", "-lo", type=str,
......@@ -378,14 +425,19 @@ if __name__ == "__main__":
(RDF.type, ARGO.Cycle),
(RDF.type, PROV.Activity)
]})
# print("{} {}".format(nb, float_info["latestCycle"]["id"]))
if nb == float_info["latestCycle"]["id"]:
print("TEST")
argo_graph.add_triple(activity_uri, ARGO.lastCycle, cycle_uri)
for file in this_cycle_files:
map_file_to_prov(file, cycle_uri, argo_graph, files_info, dac_uri)
profile_get_netcdf_info(args.netCDF+dac+"/"+afloat+"/profiles/"+file)
if not args.noprov:
prov = profile_get_netcdf_info(args.netCDF+dac+"/"+afloat+"/profiles/"+file,
files_info["infos"][file]["datamode"])
argo_graph.add_triple(cycle_uri, ARGO.number, Literal(nb, datatype=XSD.int))
argo_graph.add_triple(cycle_uri, ARGO.startDate, Literal(cycle["startDate"]))
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment