Unverified Commit a5fdaa68 authored by CHARLES's avatar CHARLES 🐧
Browse files

StringFilters rewrite

parent ca9ac663
......@@ -48,4 +48,10 @@ The code has been automatically formatted to follow PEP8 standard.
autopep8 --in-place --recursive -j8 lib/
```
There are also plugins for many editors and IDEs.
\ No newline at end of file
There are also plugins for many editors and IDEs.
### Unit testing
```
pytest lib/
```
\ No newline at end of file
......@@ -3,7 +3,6 @@
import os
import unittest
import doctest
import glob
import sys
from optparse import OptionParser
......@@ -12,7 +11,7 @@ suite = unittest.TestSuite()
EXCLUDES = ['.svn']
def test_directory(directory):
def check_directory(directory):
sys_path_ori = sys.path
sys.path.insert(0, os.path.abspath(directory))
os.chdir(directory)
......@@ -37,31 +36,31 @@ def test_directory(directory):
sys.path = sys_path_ori
def recursiv_test_directory(directory):
def recursiv_check_directory(directory):
lstdir = sorted(os.listdir(directory))
test_directory(directory)
check_directory(directory)
for file in lstdir:
curdir = os.getcwd()
if os.path.isdir(file):
if file not in EXCLUDES:
recursiv_test_directory(file)
recursiv_check_directory(file)
os.chdir(curdir)
def run(directory, recursive=True, verbose=False):
# lstdir = os.listdir(directory)
# lstdir.sort()
# test_directory(directory)
# check_directory(directory)
if recursive:
recursiv_test_directory(directory)
recursiv_check_directory(directory)
else:
test_directory(directory)
check_directory(directory)
# for file in lstdir:
# curdir = os.getcwd()
# if os.path.isdir(file):
# if not file in EXCLUDES:
# print "TESTING DIRECTORY : ", file
# test_directory(file)
# check_directory(file)
# os.chdir(curdir)
test_res = None
......
......@@ -20,7 +20,6 @@ from dchecktools.protocols import http_stdmet
from dchecktools.protocols import https_opensearch
from dchecktools.protocols import sftp
from dchecktools.filters.StringFilters import FileFilter
from dchecktools.filters.StringFilters import DirectoryFilter
from dchecktools.common.errors import DC_Error, DC_ConfigError, DC_DbError
import logging
......@@ -272,7 +271,7 @@ class Config:
if 'DATE_INDEX' in module_attr:
self.date_index = module.DATE_INDEX
# 04/04/2018 PMT ID#34 ajout pour OpenSearch
# 04/04/2018 PMT ID#34 ajout pour OpenSearch
if 'RESULT_FORMAT' in module_attr:
self.resultFormat = module.RESULT_FORMAT
if 'SELECT_GEO' in module_attr:
......@@ -572,26 +571,22 @@ class DCheck(object):
proto.setFileInfoDatabase(bfi)
dirFilter = DirectoryFilter()
dirFilter.forceDirectories = config.forceDirectories
dirFilter.forceDirectoriesRegexp = config.forceDirectoriesRegexp
dirFilter.ignoreDirectories = config.ignoreDirectories
dirFilter.ignoreDirectoriesRegexp = config.ignoreDirectoriesRegexp
dirFilter.interestingByDefault = config.default_directory_accept
# 2018/01/24 PMT ID#33 prise en compte de ignoreDirectoriesOlderThan
dirFilter.ignoreDirectoriesOlderThan = config.ignoreDirectoriesOlderThan
# 2018/01/24 PMT ID#33 prise en compte de ignoreDirectoriesChangedSince
dirFilter.ignoreDirectoriesChangedSince = config.ignoreDirectoriesChangedSince
dirFilter = FileFilter(
forceRegexp=list(config.forceDirectories) + list(config.forceDirectoriesRegexp),
ignoreRegexp=list(config.ignoreDirectories) + list(config.ignoreDirectoriesRegexp),
ignoreOlderThan=config.ignoreDirectoriesOlderThan,
ignoreNewerThan=config.ignoreDirectoriesChangedSince,
interestingByDefault=config.default_directory_accept,
)
proto.setDirectoryFilter(dirFilter)
fileFilter = FileFilter()
fileFilter.forceFiles = config.forceFiles
fileFilter.forceFilesRegexp = config.forceFilesRegexp
fileFilter.ignoreFiles = config.ignoreFiles
fileFilter.ignoreFilesRegexp = config.ignoreFilesRegexp
fileFilter.interestingByDefault = config.default_file_accept
# fcad: added modification time filter
fileFilter.ignoreFilesOlderThan = config.ignoreFilesOlderThan
fileFilter = FileFilter(
forceRegexp=list(config.forceFiles) + list(config.forceFilesRegexp),
ignoreRegexp=list(config.ignoreFiles) + list(config.ignoreFilesRegexp),
ignoreOlderThan=config.ignoreFilesOlderThan,
ignoreNewerThan=None,
interestingByDefault=config.default_file_accept,
)
proto.setFileFilter(fileFilter)
if options.purge_older_than is not None:
......
......@@ -5,7 +5,6 @@ import os
import time
from optparse import OptionParser
import logging
from dchecktools.filters.StringFilters import DirectoryFilter
from dchecktools.filters.StringFilters import FileFilter
from dchecktools.common.errors import DC_ConfigError
from dchecktools.reports import Report_ListFiles
......@@ -301,20 +300,22 @@ class DReport(object):
bfi.createTables()
report.setFileInfoDatabase(bfi)
dirFilter = DirectoryFilter()
dirFilter.forceDirectories = config.forceDirectories
dirFilter.forceDirectoriesRegexp = config.forceDirectoriesRegexp
dirFilter.ignoreDirectories = config.ignoreDirectories
dirFilter.ignoreDirectoriesRegexp = config.ignoreDirectoriesRegexp
dirFilter.interestingByDefault = config.default_directory_accept
dirFilter = FileFilter(
forceRegexp=list(config.forceDirectories) + list(config.forceDirectoriesRegexp),
ignoreRegexp=list(config.ignoreDirectories) + list(config.ignoreDirectoriesRegexp),
ignoreOlderThan=None,
ignoreNewerThan=None,
interestingByDefault=config.default_directory_accept,
)
report.setDirectoryFilter(dirFilter)
fileFilter = FileFilter()
fileFilter.forceFiles = config.forceFiles
fileFilter.forceFilesRegexp = config.forceFilesRegexp
fileFilter.ignoreFiles = config.ignoreFiles
fileFilter.ignoreFilesRegexp = config.ignoreFilesRegexp
fileFilter.interestingByDefault = config.default_file_accept
fileFilter = FileFilter(
forceRegexp=list(config.forceFiles) + list(config.forceFilesRegexp),
ignoreRegexp=list(config.ignoreFiles) + list(config.ignoreFilesRegexp),
ignoreOlderThan=None,
ignoreNewerThan=None,
interestingByDefault=config.default_file_accept,
)
report.setFileFilter(fileFilter)
log.debug("Starting...")
......
......@@ -14,23 +14,30 @@ class FileFilter(object):
based on reference regexp or string
"""
def __init__(self):
# Filters configuration
self.forceFiles = []
self.forceFilesRegexp = []
self.ignoreFiles = []
self.ignoreFilesRegexp = []
self.ignoreFilesOlderThan = None # fcad: specify modification datetime
self.interestingByDefault = True
def __init__(self, forceRegexp=[], ignoreRegexp=[], ignoreOlderThan=None, ignoreNewerThan=None, interestingByDefault=True):
#self.ignoreFilesOlderThen = None
#self.ignoreFilesMoreRecentThen = None
self.setForceInterestingRegex(forceRegexp)
self.setIgnoreRegex(ignoreRegexp)
self.setIgnoreOlderThan(ignoreOlderThan)
self.setIgnoreNewerThan(ignoreNewerThan)
self.setDefault(interestingByDefault)
# private var, to store compiledRegexp to improve performances
self.__compiledForceFilesRegexp = None
self.__compiledIgnoreFilesRegexp = None
def setForceInterestingRegex(self, rgx):
rgxList = rgx if isinstance(rgx, list) else [rgx]
self.__forceRegexp = [re.compile(r) for r in rgxList]
self.ignoreDirectoriesChangedSince = None
def setIgnoreRegex(self, rgx):
rgxList = rgx if isinstance(rgx, list) else [rgx]
self.__ignoreRegexp = [re.compile(r) for r in rgxList]
def setIgnoreOlderThan(self, date):
self.__ignoreOlderThan = date
def setIgnoreNewerThan(self, date):
self.__ignoreNewerThan = date
def setDefault(self, interesting):
self.__interestingByDefault = interesting
def isInteresting(self, file, mtime=None):
"""
......@@ -38,134 +45,25 @@ class FileFilter(object):
Output : Boolean
Test whether a file string is interesting or not, according to
current object configuration. Priority order :
forceFiles(Regexp) > ignoreFiles(Regexp) > interestingByDefault
Example :
>>> from dchecktools.filters.StringFilters import FileFilter
>>> filter = FileFilter()
>>> filter.isInteresting("myfile.txt")
True
>>> filter.interestingByDefault = False
>>> filter.isInteresting("myfile.txt")
False
>>> filter.forceFilesRegexp = [ ".*\.txt" ]
>>> filter.isInteresting("myfile.txt")
True
forceFiles(Regexp) > ignoreFiles(Regexp) > date related checks > interestingByDefault
"""
"""
# check force file
if file in self.forceFiles:
return True
"""
interest = self.interestingByDefault
# check force file regexp
if len(self.forceFilesRegexp) > 0:
if self.__compiledForceFilesRegexp is None:
self.__compiledForceFilesRegexp = []
for fileRegexp in self.forceFilesRegexp:
#print("regexp add : '%s'"%fileRegexp)
self.__compiledForceFilesRegexp += [re.compile(fileRegexp)]
#print("regexp res : '%s'"%self.__compiledForceFilesRegexp)
for regexp in self.__compiledForceFilesRegexp:
if regexp.match(file):
interest = True
"""
# check ignore file
if file in self.ignoreFiles:
return False
"""
for regexp in self.__forceRegexp:
if regexp.match(file):
return True
# check ignore file regexp
if len(self.ignoreFilesRegexp) > 0:
if self.__compiledIgnoreFilesRegexp is None:
self.__compiledIgnoreFilesRegexp = []
for fileRegexp in self.ignoreFilesRegexp:
self.__compiledIgnoreFilesRegexp += [
re.compile(fileRegexp)]
for regexp in self.__compiledIgnoreFilesRegexp:
if regexp.match(file):
interest = False
# fcad: check ignore oldest files
# 2018/01/24 PMT ID#33 mtime time -> datetime
if (self.ignoreFilesOlderThan is not None) and (mtime is not None) and (
datetime.fromtimestamp(mtime) < self.ignoreFilesOlderThan):
interest = False
# 2018/03/06 PMT ID#33 ajout du test
if (self.ignoreDirectoriesChangedSince is not None) and (mtime is not None) and (
datetime.fromtimestamp(mtime) > self.ignoreDirectoriesChangedSince):
interest = False
return interest
class DirectoryFilter(FileFilter):
"""
A DirectoryFilter object just says if a directory is interesting or not,
based on reference regexp or string. It is just a 'semantic' object,
entirely based on FileFilter
"""
def __init__(self):
FileFilter.__init__(self)
def setForceDirectories(self, forceDirectories):
self.forceFiles = forceDirectories
def getForceDirectories(self):
return self.forceFiles
def setForceDirectoriesRegexp(self, forceDirectoriesRegexp):
self.forceFilesRegexp = forceDirectoriesRegexp
def getForceDirectoriesRegexp(self):
return self.forceFilesRegexp
def setIgnoreDirectories(self, ignoreDirectories):
self.ignoreFiles = ignoreDirectories
def getIgnoreDirectories(self):
return self.ignoreFiles
for regexp in self.__ignoreRegexp:
if regexp.match(file):
return False
def setIgnoreDirectoriesRegexp(self, ignoreDirectoriesRegexp):
self.ignoreFilesRegexp = ignoreDirectoriesRegexp
def getIgnoreDirectoriesRegexp(self):
return self.ignoreFilesRegexp
forceDirectories = property(getForceDirectories, setForceDirectories)
forceDirectoriesRegexp = property(
getForceDirectoriesRegexp,
setForceDirectoriesRegexp)
ignoreDirectories = property(getIgnoreDirectories, setIgnoreDirectories)
ignoreDirectoriesRegexp = property(
getIgnoreDirectoriesRegexp,
setIgnoreDirectoriesRegexp)
# 2018/01/24 PMT ID#33 prise en compte de ignoreDirectoriesOlderThan
def setIgnoreDirectoriesOlderThan(self, ignoreDirectoriesOlderThan):
self.ignoreFilesOlderThan = ignoreDirectoriesOlderThan
def getIgnoreDirectoriesOlderThan(self):
return self.ignoreFilesOlderThan
ignoreDirectoriesOlderThan = property(
getIgnoreDirectoriesOlderThan,
setIgnoreDirectoriesOlderThan)
# 2018/03/06 PMT ID#33 prise en compte de ignoreDirectoriesChangedSince
def setIgnoreDirectoriesChangedSince(self, ignoreDirectoriesChangedSince):
self.ignoreDirectoriesChangedSince = ignoreDirectoriesChangedSince
if (self.setIgnoreOlderThan is not None) and (mtime is not None) and (
datetime.fromtimestamp(mtime) < self.setIgnoreOlderThan):
return False
def getIgnoreDirectoriesChangedSince(self):
return self.ignoreDirectoriesChangedSince
if (self.setIgnoreNewerThan is not None) and (mtime is not None) and (
datetime.fromtimestamp(mtime) > self.setIgnoreNewerThan):
return False
# ignoreDirectoriesChangedSince = property(getIgnoreDirectoriesChangedSince, setIgnoreDirectoriesChangedSince)
return self.__interestingByDefault
......@@ -46,19 +46,9 @@ class Protocol_localpath(AbstractProtocol):
def setDirectoryFilter(self, directoryFilter):
self.directoryFilter = directoryFilter
log.debug("directory filter force : %s" %
(directoryFilter.forceDirectoriesRegexp))
log.debug("directory filter ignore : %s" %
(directoryFilter.ignoreDirectoriesRegexp))
log.debug("directory filter default : %s" %
(directoryFilter.interestingByDefault))
def setFileFilter(self, fileFilter):
self.fileFilter = fileFilter
log.debug("File filter force : %s" % (fileFilter.forceFilesRegexp))
log.debug("File filter ignore : %s" % (fileFilter.ignoreFilesRegexp))
log.debug("File filter default : %s" %
(fileFilter.interestingByDefault))
def getFileInfoList(self):
if os.path.exists(self.path):
......
......@@ -26,10 +26,10 @@ class testStringFilters(unittest.TestCase):
self.assertEqual(self.filter.isInteresting(file), True)
# ignoring file, interestingByDefault = True
self.filter.ignoreFiles = ["ignoredfile.txt", "otherignored.txt"]
self.filter.setIgnoreRegex(["ignoredfile.txt", "otherignored.txt"])
for file in files_listed:
interesting = self.filter.isInteresting(file)
if file not in self.filter.ignoreFiles:
if file not in ["ignoredfile.txt", "otherignored.txt"]:
self.assertEqual(
interesting,
True,
......@@ -43,7 +43,7 @@ class testStringFilters(unittest.TestCase):
file)
# forcing file, interestingByDefault = True
self.filter.forceFiles = ['forcedfile.txt', 'ignoredfile.txt']
self.filter.setForceInterestingRegex(["forcedfile.txt", "ignoredfile.txt"])
for file in files_listed:
interesting = self.filter.isInteresting(file)
self.assertEqual(
......@@ -53,10 +53,10 @@ class testStringFilters(unittest.TestCase):
file)
# set interestingByDefault = False
self.filter.interestingByDefault = False
self.filter.setDefault(False)
for file in files_listed:
interesting = self.filter.isInteresting(file)
if file in self.filter.forceFiles:
if file in ["forcedfile.txt", "ignoredfile.txt"]:
self.assertEqual(
interesting,
True,
......@@ -70,7 +70,7 @@ class testStringFilters(unittest.TestCase):
file)
# forcing file regexp, with interestingByDefault == False
self.filter.forceFilesRegexp = [r".*\.txt"]
self.filter.setForceInterestingRegex([r".*\.txt"])
for file in files_listed:
interesting = self.filter.isInteresting(file)
if file.endswith('.txt'):
......@@ -86,16 +86,15 @@ class testStringFilters(unittest.TestCase):
"file %s should not be interesting" %
file)
self.filter.forceFiles = ["forcedfile.txt", "ignoredfile.txt"]
self.filter.setForceInterestingRegex(["forcedfile.txt", "ignoredfile.txt"])
self.filter.setDefault(True)
self.filter.interestingByDefault = True
self.assertEqual(self.filter.isInteresting('standardfile.ext'), True)
self.assertEqual(self.filter.isInteresting('forcedfile.txt'), True)
self.assertEqual(self.filter.isInteresting('ignoredfile.txt'), True)
self.filter.forceFiles = ["forcedfile.txt"]
self.filter.forceFilesRegexp = []
self.filter.interestingByDefault = False
self.filter.setForceInterestingRegex(["forcedfile.txt"])
self.filter.setDefault(False)
self.assertEqual(self.filter.isInteresting('standardfile.ext'), False)
self.assertEqual(self.filter.isInteresting('forcedfile.txt'), True)
self.assertEqual(self.filter.isInteresting('ignoredfile.txt'), False)
......
pylint
autopep8
\ No newline at end of file
autopep8
pytest
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment