Commit 1ecfde6d authored by BODERE's avatar BODERE
Browse files

feat: dataset configuration

Refs: #6
parent 08ddbdce
......@@ -2,6 +2,7 @@
# -*- coding: utf-8 -*
"""Opensearx configuration"""
from typing import List
import importlib
from pydantic import BaseSettings, BaseModel, HttpUrl
from pydantic.tools import lru_cache
......@@ -14,14 +15,39 @@ from opensearx_ws.opensearch.engine import (IfremerOpensearchEngine,
class OpensearhEngine(BaseModel):
name: str
engine_class: str
engine_module: str = "opensearx_ws.opensearch.engine"
root_path: HttpUrl
timeout: float = 10.0
datasets_mapping: dict
class Settings(BaseSettings):
context_path: str = ""
templates_path: str = "templates"
opensearch_engines: List[OpensearhEngine] = list()
datasets: List[str] = [
"AVHRR_SST_METOP_A-OSISAF-L2P-v1.0"
]
opensearch_engines: List[OpensearhEngine] = [
OpensearhEngine(
name="Opensearch Ifremer",
engine_class="IfremerOpensearchEngine",
root_path='https://opensearch.ifremer.fr/granules.atom',
timeout=30.0,
datasets_mapping={
"AVHRR_SST_METOP_A-OSISAF-L2P-v1.0": "avhrr_sst_metop_c-osisaf-l2p-v1.0"
}
),
OpensearhEngine(
name="PODAAC engine",
engine_class="JPLOpensearchEngine",
root_path='https://cmr.earthdata.nasa.gov/opensearch/granules.atom',
timeout=30.0,
datasets_mapping={
"AVHRR_SST_METOP_A-OSISAF-L2P-v1.0": "C1664387028-PODAAC"
}
)
]
@lru_cache()
......@@ -29,21 +55,24 @@ def get_settings():
return Settings()
def opensearch_engine_factory(engine_conf: OpensearhEngine):
module = importlib.import_module(engine_conf.engine_module)
class_ = getattr(module, engine_conf.engine_class)
return class_(
root_path=engine_conf.root_path,
timeout=engine_conf.timeout,
datasets_mapping=engine_conf.datasets_mapping
)
settings = get_settings()
# jinja2 templates utility
j2_templates = Jinja2Templates(directory=settings.templates_path)
# opensearch engines executed
# opensearch engines instances
opensearch_engines = {
# "Opensearch Ifremer":
# IfremerOpensearchEngine(root_path='https://opensearch.ifremer.fr/granules.atom', timeout=20.0),
# "Opensearch Ifremer - timeout 1s":
# IfremerOpensearchEngine(root_path='https://opensearch.ifremer.fr/granules.atom', timeout=1.0),
# "Non-existent URL":
# IfremerOpensearchEngine(root_path='https://toto.ifremer.fr/granules.atom', timeout=2.0),
"JPL engine":
JPLOpensearchEngine(root_path='https://cmr.earthdata.nasa.gov/opensearch/granules.atom', timeout=30.0)
engine_conf.name: opensearch_engine_factory(engine_conf) for engine_conf in settings.opensearch_engines
}
......
......@@ -84,19 +84,14 @@ def home(request: Request):
"""Home endpoint
"""
return {
"Test granule Ifremer (atom, default)":
f"{request.url}granules?datasetId=sca_l2a___&startPage=0&count=1000"
"&timeStart=2014-01-01T00:00:00Z&timeEnd=2020-05-18T23:59:59Z&geoBox=134,-64,135,-63",
"Test granule Nasa (atom)":
f"{request.url}granules.atom?datasetId=C1693233348-PODAAC&startPage=0&count=1000"
"&timeStart=2014-01-01T00:00:00Z&timeEnd=2020-05-18T23:59:59Z&geoBox=134,-64,135,-63",
"Test granule Ifremer (raw)":
f"{request.url}granules.raw?datasetId=sca_l2a___&startPage=0&count=1000"
"&timeStart=2014-01-01T00:00:00Z&timeEnd=2020-05-18T23:59:59Z&geoBox=134,-64,135,-63",
"Test granule Ifremer (json)":
f"{request.url}granules.json?datasetId=sca_l2a___&startPage=0&count=1000"
"&timeStart=2014-01-01T00:00:00Z&timeEnd=2020-05-18T23:59:59Z&geoBox=134,-64,135,-63",
"Configuration": f"{request.url}engines"
"AVHRR_SST_METOP_A-OSISAF-L2P-v1.0 (atom)":
f"{request.url}granules.atom?datasetId=AVHRR_SST_METOP_A-OSISAF-L2P-v1.0&startPage=0&count=100"
"&timeStart=2000-01-01T00:00:00Z&timeEnd=2021-12-31T23:59:59Z&geoBox=-180.0,-90.0,180.0,90.0",
"AVHRR_SST_METOP_A-OSISAF-L2P-v1.0 (raw)":
f"{request.url}granules.raw?datasetId=AVHRR_SST_METOP_A-OSISAF-L2P-v1.0&startPage=0&count=100"
"&timeStart=2000-01-01T00:00:00Z&timeEnd=2100-12-31T23:59:59Z&geoBox=-180.0,-90.0,180.0,90.0",
"Configuration": f"{request.url}engines",
"Documentation": f"{request.url}docs"
}
......
......@@ -3,7 +3,7 @@
"""Opensearch engines"""
import json
import logging
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, Any
from urllib.parse import parse_qs, quote, urlencode, urlsplit, urlunsplit
from xml.etree import ElementTree as Element
......@@ -21,15 +21,19 @@ from opensearx_ws.opensearch.model import (OpensearchQuery,
class OpensearchEngine:
def __init__(self, root_path: str, timeout: float = 10.0):
def __init__(self, root_path: str, timeout: float = 10.0, datasets_mapping: dict = None):
self.root_path = root_path
self.timeout = timeout
self.datasets_mapping = datasets_mapping if datasets_mapping is not None else dict()
self._log = logging.getLogger(__name__)
def to_dict(self):
return {
"engine_class": self.__class__.__name__,
"engine_module": self.__class__.__module__,
"root_path": self.root_path,
"timeout": self.timeout
"timeout": self.timeout,
"datasets_mapping": self.datasets_mapping
}
def __repr__(self):
......@@ -38,6 +42,9 @@ class OpensearchEngine:
def __str__(self):
return repr(self)
def is_dataset_available(self, dataset: str):
return dataset in self.datasets_mapping
async def request(self, params: OpensearchQueryParameters) -> Optional[OpensearchResponse]:
async def request_hook(request):
print("Request event hook: %s %s - Waiting for response", request.method, request.url)
......@@ -46,9 +53,18 @@ class OpensearchEngine:
request = response.request
print(f"Response event hook: - Status %d", request.method, request.url, response.status_code)
url = self._build_search_url(params)
opensearch_response = OpensearchResponse(query=OpensearchQuery(params=params, url=url))
# check if datataset is available
if not self.is_dataset_available:
return None
# build url
effective_params = self._prepare_query_parameters(params)
url = self._build_search_url(effective_params)
# initialize response
opensearch_response = OpensearchResponse(query=OpensearchQuery(params=effective_params, url=url))
# execute url
try:
async with httpx.AsyncClient(
event_hooks={'request': [request_hook], 'response': [response_hook]}
......@@ -56,6 +72,7 @@ class OpensearchEngine:
raw_response = await client.get(url=url, timeout=self.timeout)
opensearch_response.header, opensearch_response.entries = self._process_response(raw_response)
except httpx.TimeoutException:
print("Timeout occurred")
opensearch_response.errors.append("Timeout occurred")
except ValidationError as e:
print(e.json())
......@@ -64,23 +81,24 @@ class OpensearchEngine:
msg = str(e).strip()
if not msg:
msg = "Empty response"
print(msg)
opensearch_response.errors.append(msg)
finally:
if opensearch_response.header is None and not opensearch_response.errors:
opensearch_response.errors.append("Empty response")
return opensearch_response
def _build_search_url(self, params: OpensearchQueryParameters) -> str:
def _build_search_url(self, params: Dict[str, str]) -> str:
scheme, netloc, path, query_string, fragment = urlsplit(self.root_path)
query_params = parse_qs(query_string)
for k, v in self._prepare_query_parameters(params).items():
for k, v in params.items():
query_params[k] = v
new_query_string = urlencode(query_params, doseq=False, quote_via=quote)
return urlunsplit((scheme, netloc, path, new_query_string, fragment))
def _prepare_query_parameters(self, params: OpensearchQueryParameters) -> Dict:
def _prepare_query_parameters(self, params: OpensearchQueryParameters) -> Dict[str, Any]:
return {
"datasetId": params.datasetId,
"datasetId": self.datasets_mapping[params.datasetId],
"timeStart": params.timeStart.strftime("%Y-%m-%dT%H:%M:%SZ"),
"timeEnd": params.timeEnd.strftime("%Y-%m-%dT%H:%M:%SZ"),
"geoBox": params.geoBox,
......@@ -181,7 +199,7 @@ class JPLOpensearchEngine(OpensearchAtomEngine):
def _prepare_query_parameters(self, params: OpensearchQueryParameters) -> Dict:
return {
"parentIdentifier": params.datasetId,
"parentIdentifier": self.datasets_mapping[params.datasetId],
"startTime": params.timeStart.strftime("%Y-%m-%dT%H:%M:%SZ"),
"endTime": params.timeEnd.strftime("%Y-%m-%dT%H:%M:%SZ"),
"spatial_type": "bbox",
......
......@@ -2,16 +2,11 @@
# -*- coding: utf-8 -*-
"""Opensearch model"""
import xml.etree.ElementTree as Element
from datetime import datetime
from enum import Enum
from typing import Dict, List, Optional, Tuple
from urllib.parse import parse_qs, quote, urlencode, urlsplit, urlunsplit
from typing import Dict, List, Any
import httpx
from httpx import Response
from pydantic.main import BaseModel
from starlette.datastructures import URL
class OpensearchResponseFormat(str, Enum):
......@@ -58,7 +53,7 @@ class OpensearchResponseHeader(BaseModel):
class OpensearchQuery(BaseModel):
url: str
params: OpensearchQueryParameters
params: Dict[str, Any]
class OpensearchResponse(BaseModel):
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment