Commit 290fee10 authored by BODERE's avatar BODERE
Browse files

feat: parse atom with ElementTree

parent 040ea0fa
......@@ -4,6 +4,6 @@ from opensearx_ws.opensearch import IfremerOpenSearchEngine
opensearch_engines = {
"Ifremer": IfremerOpenSearchEngine(root_path='https://opensearch.ifremer.fr/granules.atom', timeout=30.0),
"Nasa": IfremerOpenSearchEngine(root_path='https://opensearch.ifremer.fr/granules.atom', timeout=50.0)
"Nasa": IfremerOpenSearchEngine(root_path='https://opensearch.ifremer.fr/granules.atom', timeout=2.0)
}
......@@ -21,6 +21,7 @@ def conf():
@app.get("/granules")
async def granule(params: OpensearchQueryParameters = Depends()):
# @see : https://docs.python.org/fr/3/library/asyncio-task.html#running-tasks-concurrently
tasks = [asyncio.create_task(engine.request(params)) for engine in opensearch_engines.values()]
responses = await asyncio.gather(*tasks)
# TODO merge responses
......
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import sys
import xml.etree.ElementTree as Element
from urllib.parse import urlencode, urlsplit, parse_qs, urlunsplit, quote
import atoma
from datetime import datetime
from enum import Enum
from typing import List, Dict, Optional
from typing import List, Dict, Optional, Tuple
import httpx
from atoma.atom import AtomEntry
from httpx import Response
from pydantic.main import BaseModel
......@@ -40,26 +39,32 @@ class OpensearchResponseEntryLink(BaseModel):
class OpensearchResponseEntry(BaseModel):
title: str
# id: str
updated: datetime
# dc_date: str
# geobox: str
# geobox_where: str
id: str = None
title: str = None
summary: str = None
updated: datetime = None
dc_date: str = None
geobox: str = None
geobox_where: str = None
links: List[OpensearchResponseEntryLink] = list()
# summary: str
class OpensearchResponseHeader(BaseModel):
title: str
id: str
total_results: int
title: str
total_results: int = 0
start_index: int = 0
items_per_page: int = 0
class OpensearchQuery(BaseModel):
url: str
params: OpensearchQueryParameters
class OpensearchResponse(BaseModel):
query: OpensearchQueryParameters = None
query: OpensearchQuery = None
error: str = None
header: OpensearchResponseHeader = None
entries: List[OpensearchResponseEntry] = list()
......@@ -79,9 +84,22 @@ class OpensearchEngine:
request = response.request
print(f"Response event hook: {request.method} {request.url} - Status {response.status_code}")
async with httpx.AsyncClient(event_hooks={'request': [request_hook], 'response': [response_hook]}) as client:
response = await client.get(url=self._build_search_url(params), timeout=self.timeout)
return self._process_response(response)
url = self._build_search_url(params)
opensearch_response = OpensearchResponse(query=OpensearchQuery(params=params, url=url))
try:
async with httpx.AsyncClient(
event_hooks={'request': [request_hook], 'response': [response_hook]}
) as client:
raw_response = await client.get(url=url, timeout=self.timeout)
opensearch_response.header, opensearch_response.entries = self._process_response(raw_response)
except httpx.ReadTimeout:
opensearch_response.error = "Timeout occurred"
except Exception as e:
print(str(e))
opensearch_response.error = str(e)
finally:
return opensearch_response
def _build_search_url(self, params: OpensearchQueryParameters) -> str:
scheme, netloc, path, query_string, fragment = urlsplit(self.root_path)
......@@ -96,35 +114,82 @@ class OpensearchEngine:
"datasetId": params.datasetId
}
def _process_response(self, http_response: Response) -> Optional[OpensearchResponse]:
def _process_response(self, http_response: Response) -> Tuple[OpensearchResponseHeader, List[OpensearchResponseEntry]]:
raise NotImplementedError()
class IfremerOpenSearchEngine(OpensearchEngine):
def _parse_entry(self, entry: AtomEntry) -> OpensearchResponseEntry:
opensearch_entry = OpensearchResponseEntry(
title=entry.title.value,
updated=entry.updated,
summary=entry.summary
class OpensearchAtomEngine(OpensearchEngine):
# namespace to parse XML
namespaces = {
"feed": "http://www.w3.org/2005/Atom",
"opensearch": "http://a9.com/-/spec/opensearch/1.1/",
"geo": "http://a9.com/-/opensearch/extensions/geo/1.0/",
"time": "http://a9.com/-/opensearch/extensions/time/1.0/",
"georss": "http://www.georss.org/georss/10",
"dc": "http://purl.org/dc/elements/1.1",
"gml": "http://www.opengis.net/gml",
"cwic": "http://cwic.wgiss.ceos.org/opensearch/extensions/1.0/"
}
def _process_response(self, http_response: Response) -> Tuple[OpensearchResponseHeader, List[OpensearchResponseEntry]]:
feed = Element.fromstring(http_response.content.decode("utf8"))
return (
self._parse_header(feed),
[self._parse_entry(entry) for entry in self.get_children(feed, "feed:entry")]
)
for link in entry.links:
opensearch_entry.links.append(OpensearchResponseEntryLink(
title=link.title,
href=link.href,
rel=link.rel,
type=link.type_
))
def _parse_header(self, feed: Element) -> OpensearchResponseHeader:
return OpensearchResponseHeader(
title=self.get_text(feed, "feed:title"),
id=self.get_text(feed, "feed:id"),
updated=self.get_text(feed, "feed:updated"),
total_results=self.get_text(feed, "opensearch:totalResults"),
start_index=self.get_text(feed, "opensearch:startIndex"),
items_per_page=self.get_text(feed, "opensearch:itemsPerPage")
)
def _parse_entry(self, entry: Element) -> OpensearchResponseEntry:
opensearch_entry = self._parse_entry_metadata(entry)
for link in self.get_children(entry, "feed:link"):
opensearch_entry.links.append(self._parse_link(link))
return opensearch_entry
def _process_response(self, http_response: Response) -> Optional[OpensearchResponse]:
opensearch_response = OpensearchResponse()
def _parse_entry_metadata(self, entry: Element) -> OpensearchResponseEntry:
return OpensearchResponseEntry(
id=self.get_text(entry, "feed:id"),
title=self.get_text(entry, "feed:title"),
updated=self.get_text(entry, "feed:updated"),
summary=self.get_text(entry, "feed:summary")
)
def _parse_link(self, link: Element) -> OpensearchResponseEntryLink:
return OpensearchResponseEntryLink(
title=link.get("title"),
href=link.get("href"),
rel=link.get("rel"),
type=link.get("type")
)
feed = atoma.parse_atom_bytes(http_response.content)
def get_text(self, elt: Element, name: str) -> Optional[str]:
sub_elt = elt.find(name, namespaces=self.namespaces)
if sub_elt is None:
return None
return sub_elt.text
for entry in feed.entries:
opensearch_response.entries.append(self._parse_entry(entry))
def get_children(self, elt: Element, name: str) -> List:
return elt.findall(name, namespaces=self.namespaces)
return opensearch_response
class IfremerOpenSearchEngine(OpensearchAtomEngine):
def _parse_entry_metadata(self, entry: Element) -> OpensearchResponseEntry:
return OpensearchResponseEntry(
id=self.get_text(entry, "feed:id"),
title=self.get_text(entry, "feed:title"),
updated=self.get_text(entry, "feed:updated"),
summary=self.get_text(entry, "feed:summary"),
dc_date=self.get_text(entry, "dc:date"),
geobox=self.get_text(entry, "georss:box"),
geobox_where=self.get_text(entry, "geobox_where")
)
[[package]]
name = "atoma"
version = "0.0.17"
description = "Atom, RSS and JSON feed parser for Python 3"
category = "main"
optional = false
python-versions = "*"
[package.dependencies]
attrs = "*"
defusedxml = "*"
python-dateutil = "*"
[package.extras]
tests = ["pytest", "pytest-cov", "python-coveralls", "pycodestyle"]
[[package]]
name = "attrs"
version = "20.3.0"
description = "Classes Without Boilerplate"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*"
[package.extras]
dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface", "furo", "sphinx", "pre-commit"]
docs = ["furo", "sphinx", "zope.interface"]
tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "zope.interface"]
tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six"]
[[package]]
name = "certifi"
version = "2020.12.5"
......@@ -44,22 +14,6 @@ category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "colorama"
version = "0.4.4"
description = "Cross-platform colored terminal text."
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "defusedxml"
version = "0.7.1"
description = "XML bomb protection for Python stdlib modules"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
[[package]]
name = "fastapi"
version = "0.63.0"
......@@ -127,21 +81,6 @@ category = "main"
optional = false
python-versions = ">=3.4"
[[package]]
name = "loguru"
version = "0.5.3"
description = "Python logging made (stupidly) simple"
category = "main"
optional = false
python-versions = ">=3.5"
[package.dependencies]
colorama = {version = ">=0.3.4", markers = "sys_platform == \"win32\""}
win32-setctime = {version = ">=1.0.0", markers = "sys_platform == \"win32\""}
[package.extras]
dev = ["codecov (>=2.0.15)", "colorama (>=0.3.4)", "flake8 (>=3.7.7)", "tox (>=3.9.0)", "tox-travis (>=0.12)", "pytest (>=4.6.2)", "pytest-cov (>=2.7.1)", "Sphinx (>=2.2.1)", "sphinx-autobuild (>=0.7.1)", "sphinx-rtd-theme (>=0.4.3)", "black (>=19.10b0)", "isort (>=5.1.1)"]
[[package]]
name = "pydantic"
version = "1.8.1"
......@@ -157,17 +96,6 @@ typing-extensions = ">=3.7.4.3"
dotenv = ["python-dotenv (>=0.10.4)"]
email = ["email-validator (>=1.0.3)"]
[[package]]
name = "python-dateutil"
version = "2.8.1"
description = "Extensions to the standard Python datetime module"
category = "main"
optional = false
python-versions = "!=3.0.*,!=3.1.*,!=3.2.*,>=2.7"
[package.dependencies]
six = ">=1.5"
[[package]]
name = "rfc3986"
version = "1.4.0"
......@@ -182,14 +110,6 @@ idna = {version = "*", optional = true, markers = "extra == \"idna2008\""}
[package.extras]
idna2008 = ["idna"]
[[package]]
name = "six"
version = "1.15.0"
description = "Python 2 and 3 compatibility utilities"
category = "main"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*"
[[package]]
name = "sniffio"
version = "1.2.0"
......@@ -233,31 +153,12 @@ typing-extensions = {version = "*", markers = "python_version < \"3.8\""}
[package.extras]
standard = ["websockets (>=8.0.0,<9.0.0)", "watchgod (>=0.6)", "python-dotenv (>=0.13)", "PyYAML (>=5.1)", "httptools (>=0.1.0,<0.2.0)", "uvloop (>=0.14.0,!=0.15.0,!=0.15.1)", "colorama (>=0.4)"]
[[package]]
name = "win32-setctime"
version = "1.0.3"
description = "A small Python utility to set file creation time on Windows"
category = "main"
optional = false
python-versions = ">=3.5"
[package.extras]
dev = ["pytest (>=4.6.2)", "black (>=19.3b0)"]
[metadata]
lock-version = "1.1"
python-versions = "^3.7"
content-hash = "b4212bb4ff6100637725acbb6b2f87fb94f845a9cb0456014813a99617ed6dc0"
content-hash = "901bff720b66b10f60635c68121249b29a217c7087100114efc402925c16b9e2"
[metadata.files]
atoma = [
{file = "atoma-0.0.17-py3-none-any.whl", hash = "sha256:aef9c9fa2c3f7e3f721ae0502e60846853cc7f5c3cf37576336bc1fa2b314c4c"},
{file = "atoma-0.0.17.tar.gz", hash = "sha256:110677804e0b3dda6993fccaee44eb2929b9ee9deff3e7e434bcc3bddad6dcac"},
]
attrs = [
{file = "attrs-20.3.0-py2.py3-none-any.whl", hash = "sha256:31b2eced602aa8423c2aea9c76a724617ed67cf9513173fd3a4f03e3a929c7e6"},
{file = "attrs-20.3.0.tar.gz", hash = "sha256:832aa3cde19744e49938b91fea06d69ecb9e649c93ba974535d08ad92164f700"},
]
certifi = [
{file = "certifi-2020.12.5-py2.py3-none-any.whl", hash = "sha256:719a74fb9e33b9bd44cc7f3a8d94bc35e4049deebe19ba7d8e108280cfd59830"},
{file = "certifi-2020.12.5.tar.gz", hash = "sha256:1a4995114262bffbc2413b159f2a1a480c969de6e6eb13ee966d470af86af59c"},
......@@ -266,14 +167,6 @@ click = [
{file = "click-7.1.2-py2.py3-none-any.whl", hash = "sha256:dacca89f4bfadd5de3d7489b7c8a566eee0d3676333fbb50030263894c38c0dc"},
{file = "click-7.1.2.tar.gz", hash = "sha256:d2b5255c7c6349bc1bd1e59e08cd12acbbd63ce649f2588755783aa94dfb6b1a"},
]
colorama = [
{file = "colorama-0.4.4-py2.py3-none-any.whl", hash = "sha256:9f47eda37229f68eee03b24b9748937c7dc3868f906e8ba69fbcbdd3bc5dc3e2"},
{file = "colorama-0.4.4.tar.gz", hash = "sha256:5941b2b48a20143d2267e95b1c2a7603ce057ee39fd88e7329b0c292aa16869b"},
]
defusedxml = [
{file = "defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61"},
{file = "defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69"},
]
fastapi = [
{file = "fastapi-0.63.0-py3-none-any.whl", hash = "sha256:98d8ea9591d8512fdadf255d2a8fa56515cdd8624dca4af369da73727409508e"},
{file = "fastapi-0.63.0.tar.gz", hash = "sha256:63c4592f5ef3edf30afa9a44fa7c6b7ccb20e0d3f68cd9eba07b44d552058dcb"},
......@@ -294,10 +187,6 @@ idna = [
{file = "idna-3.1-py3-none-any.whl", hash = "sha256:5205d03e7bcbb919cc9c19885f9920d622ca52448306f2377daede5cf3faac16"},
{file = "idna-3.1.tar.gz", hash = "sha256:c5b02147e01ea9920e6b0a3f1f7bb833612d507592c837a6c49552768f4054e1"},
]
loguru = [
{file = "loguru-0.5.3-py3-none-any.whl", hash = "sha256:f8087ac396b5ee5f67c963b495d615ebbceac2796379599820e324419d53667c"},
{file = "loguru-0.5.3.tar.gz", hash = "sha256:b28e72ac7a98be3d28ad28570299a393dfcd32e5e3f6a353dec94675767b6319"},
]
pydantic = [
{file = "pydantic-1.8.1-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:0c40162796fc8d0aa744875b60e4dc36834db9f2a25dbf9ba9664b1915a23850"},
{file = "pydantic-1.8.1-cp36-cp36m-manylinux1_i686.whl", hash = "sha256:fff29fe54ec419338c522b908154a2efabeee4f483e48990f87e189661f31ce3"},
......@@ -322,18 +211,10 @@ pydantic = [
{file = "pydantic-1.8.1-py3-none-any.whl", hash = "sha256:e3f8790c47ac42549dc8b045a67b0ca371c7f66e73040d0197ce6172b385e520"},
{file = "pydantic-1.8.1.tar.gz", hash = "sha256:26cf3cb2e68ec6c0cfcb6293e69fb3450c5fd1ace87f46b64f678b0d29eac4c3"},
]
python-dateutil = [
{file = "python-dateutil-2.8.1.tar.gz", hash = "sha256:73ebfe9dbf22e832286dafa60473e4cd239f8592f699aa5adaf10050e6e1823c"},
{file = "python_dateutil-2.8.1-py2.py3-none-any.whl", hash = "sha256:75bb3f31ea686f1197762692a9ee6a7550b59fc6ca3a1f4b5d7e32fb98e2da2a"},
]
rfc3986 = [
{file = "rfc3986-1.4.0-py2.py3-none-any.whl", hash = "sha256:af9147e9aceda37c91a05f4deb128d4b4b49d6b199775fd2d2927768abdc8f50"},
{file = "rfc3986-1.4.0.tar.gz", hash = "sha256:112398da31a3344dc25dbf477d8df6cb34f9278a94fee2625d89e4514be8bb9d"},
]
six = [
{file = "six-1.15.0-py2.py3-none-any.whl", hash = "sha256:8b74bedcbbbaca38ff6d7491d76f2b06b3592611af620f8426e82dddb04a5ced"},
{file = "six-1.15.0.tar.gz", hash = "sha256:30639c035cdb23534cd4aa2dd52c3bf48f06e5f4a941509c8bafd8ce11080259"},
]
sniffio = [
{file = "sniffio-1.2.0-py3-none-any.whl", hash = "sha256:471b71698eac1c2112a40ce2752bb2f4a4814c22a54a3eed3676bc0f5ca9f663"},
{file = "sniffio-1.2.0.tar.gz", hash = "sha256:c4666eecec1d3f50960c6bdf61ab7bc350648da6c126e3cf6898d8cd4ddcd3de"},
......@@ -351,7 +232,3 @@ uvicorn = [
{file = "uvicorn-0.13.4-py3-none-any.whl", hash = "sha256:7587f7b08bd1efd2b9bad809a3d333e972f1d11af8a5e52a9371ee3a5de71524"},
{file = "uvicorn-0.13.4.tar.gz", hash = "sha256:3292251b3c7978e8e4a7868f4baf7f7f7bb7e40c759ecc125c37e99cdea34202"},
]
win32-setctime = [
{file = "win32_setctime-1.0.3-py3-none-any.whl", hash = "sha256:dc925662de0a6eb987f0b01f599c01a8236cb8c62831c22d9cada09ad958243e"},
{file = "win32_setctime-1.0.3.tar.gz", hash = "sha256:4e88556c32fdf47f64165a2180ba4552f8bb32c1103a2fafd05723a0bd42bd4b"},
]
......@@ -38,8 +38,5 @@ fastapi = "^0.63" # MIT License
httpx = "^0.17" # BSD License
uvicorn = "^0.13.4" # BSD License
atoma = "^0.0.17" # MIT License
loguru = "^0.5.3" # MIT License
[tool.poetry.scripts]
run = "cops_ws.main:main"
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment