Commit da483dce authored by pm22d12's avatar pm22d12
Browse files

Add #61 - unit test Downloader FileExtractor

parent 5954da26
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of data/fileextractor</title>
</head>
<body>
<h1>Index of /data/fileextractor<h1>
<pre>
<img src="/icons/blank.gif" alt="Icon "> <a href="?C=N;O=D">Name</a> <a href="?C=M;O=A">Last modified</a> <a href="?C=S;O=A">Size</a> <a href="?C=D;O=A">Description</a><hr>
<img src="/icons/back.gif" alt="[DIR]"> <a href="folder_1/">folder_1/</a> 29-Mar-2020 23:00 -
<img src="/icons/unknown.gif" alt="[ ]"> <a href="file_1.dat">file_1.dat</a> 29-Mar-2020 23:50 12.4M
<img src="/icons/unknown.gif" alt="[ ]"> <a href="file_2.dat">file_2.dat</a> 30-Mar-2020 04:52 2.4K
<img src="/icons/unknown.gif" alt="[ ]"> <a href="file_3.dat">file_3.dat</a> 30-Mar-2020 04:52 253
<hr></pre>
</body></html>
CTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="https://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
</head>
<body>
<table><tr><th>Name</th><th>Last modified</th><th>Size</th></tr><tr><th colspan="4"></th></tr>
<tr><td valign="top"><a href="folder_1/"><img src="/ico_folder.gif" alt="[DIR]"></a></td><td><a href="folder_1/">FOLDER_1</a></td><td align="right">08-Apr-2019 19:33 </td><td align="right"> - </td></tr>
<tr><td valign="top"><a href="file_1.hdf"><img src="/icons/unknown.gif" alt="[ ]"></a></td><td><a href="file_1.hdf">file_1.hdf</a></td><td align="right">05-Mar-2020 18:43 </td><td align="right">1.5G</td></tr>
<tr><td valign="top"><a href="file_2.hdf.gz"><img src="/icons/compressed.gif" alt="[ ]"></a></td><td><a href="file_2.hdf.gz">file_2.hdf.GZ</a></td><td align="right">05-Mar-2020 18:43 </td><td align="right">65M</td></tr>
</table>
</body>
</html>
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">
<html>
<head>
<title>Index of /data/UniBremen</title>
</head>
<body>
<table>
<tr><th valign="top"><img src="/icons/blank.gif" alt="[ICO]"></th><th><a href="?C=N;O=D">Name</a></th><th><a href="?C=M;O=A">Last modified</a></th><th><a href="?C=S;O=A">Size</a></th><th><a href="?C=D;O=A">Description</a></th></tr>
<tr><th colspan="5"><hr></th></tr>
<tr><td valign="top"><img src="/icons/folder.gif" alt="[DIR]"></td><td><a href="folder_1/">folder_1/</a></td><td align="right">2020-03-30 05:35 </td><td align="right"> - </td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/unknown.gif" alt="[ ]"></td><td><a href="file_1.hdf">file_1.hdf</a></td><td align="right">2020-03-02 04:51 </td><td align="right">1.0M</td><td>&nbsp;</td></tr>
<tr><td valign="top"><img src="/icons/image2.gif" alt="[IMG]"></td><td><a href="file_2.tif">file_2.tif</a></td><td align="right">2020-03-02 05:32 </td><td align="right">242K</td><td>&nbsp;</td></tr>
</table>
</body></html>
<html>
<body><h1>data/UniHamburg/catalog.html</h1>
<table>
<tr>
<th align='left'><font size='+1'>Dataset</font></th>
<th align='center'><font size='+1'>Size</font></th>
<th align='right'><font size='+1'>Last Modified</font></th>
</tr>
<tr>
<td align='left'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<img src='/thredds/folder.gif' alt='Folder'> &nbsp;<a href='folder_1/catalog.html'><tt>folder_1/</tt></a></td>
<td align='right'><tt>&nbsp;</tt></td>
<td align='right'><tt>--</tt></td>
</tr>
<tr bgcolor='#eeeeee'>
<td align='left'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href='catalog.html?data/file_1.nc'><tt>file_1.nc</tt></a></td>
<td align='right'><tt>22.34 Mbytes</tt></td>
<td align='right'><tt>2020-02-01T18:39:22Z</tt></td>
</tr>
<tr>
<td align='left'>&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;
<a href='catalog.html?data/file_2.nc'><tt>file_2.nc</tt></a></td>
<td align='right'><tt>349.4 Kbytes</tt></td>
<td align='right'><tt>2020-02-01T00:04:28Z</tt></td>
</tr>
</table>
</body>
</html>
{
"test_FileExtractor":
[
{
"filepath": "data/fileextractor/fileextractor1.html",
"files_extracted": "{'folder_1': (1, '29-Mar-2020 23:00', '0'), 'file_1.dat': (0, '29-Mar-2020 23:50', '12.4M'), 'file_2.dat': (0, '30-Mar-2020 04:52', '2.4K'), 'file_3.dat': (0, '30-Mar-2020 04:52', '253')}",
}
],
"test_Nomads":
[
{
"filepath": "data/fileextractor/nomads1.html",
"files_extracted": "{'folder_1': (1, '08-Apr-2019 19:33', '0'), 'file_1.hdf': (0, '05-Mar-2020 18:43', '1.5G'), 'file_2.hdf.gz': (0, '05-Mar-2020 18:43', '65M')}",
}
],
"test_UniBremen":
[
{
"filepath": "data/fileextractor/unibremen1.html",
"files_extracted": "{'folder_1': (1, '2020-03-30 05:35', '0'), 'file_1.hdf': (0, '2020-03-02 04:51', '1.0M'), 'file_2.tif': (0, '2020-03-02 05:32', '242K')}",
}
],
"test_UniHamburg":
[
{
"filepath": "data/fileextractor/unihamburg1.html",
"files_extracted": "{'folder_1/': (1, '', '0'), 'file_1.nc': (0, '2020-02-01T18:39:22Z', '22.34 Mbytes'), 'file_2.nc': (0, '2020-02-01T00:04:28Z', '349.4 Kbytes')}",
}
],
}
import unittest
import os
import re
import ast
import shutil
from dchecktools.protocols.https_directorylist import FileExtractor
from dchecktools.protocols.file_extractor import *
class PluginsFileExtractorTestCase(unittest.TestCase):
set_of_tests = {}
def run_Extractor(self, extractor, set_of_test):
html = ""
with open(set_of_test['filepath'], 'r') as f:
for line in f.readlines():
html += line.rstrip("\n")
extractor.feed(html)
files = extractor.get_files()
self.assertEqual(str(files), set_of_test['files_extracted'])
def setUp(self) -> None:
if PluginsFileExtractorTestCase.set_of_tests == {}:
cfg = ""
cfg_file = os.path.splitext(__file__)[0] + '.cfg'
with open(cfg_file, 'r') as f:
for line in f.readlines():
cfg += line.rstrip("\n")
PluginsFileExtractorTestCase.set_of_tests = ast.literal_eval(cfg)
if self._testMethodName in PluginsFileExtractorTestCase.set_of_tests.keys():
self.set_of_test = PluginsFileExtractorTestCase.set_of_tests[self._testMethodName]
else:
self.skipTest(f"No set of test for {self._testMethodName} in {os.path.splitext(__file__)[0] + '.cfg'}")
# timeout in minutes
self.__timeout = 60
def test_FileExtractor(self) -> None:
for set_of_test in self.set_of_test:
extractor = FileExtractor()
self.run_Extractor(extractor, set_of_test)
def test_UniBremen(self) -> None:
for set_of_test in self.set_of_test:
extractor = FileExtractor_UniBremen()
self.run_Extractor(extractor, set_of_test)
def test_UniHamburg(self) -> None:
for set_of_test in self.set_of_test:
extractor = FileExtractor_UniHamburg()
self.run_Extractor(extractor, set_of_test)
def test_Nomads(self) -> None:
for set_of_test in self.set_of_test:
extractor = FileExtractor_Nomads()
self.run_Extractor(extractor, set_of_test)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment