Commit 6544672f authored by Laure QUINTRIC's avatar Laure QUINTRIC
Browse files

can handle genoscope and classique R1/R2 files

parent c04856e5
......@@ -56,10 +56,8 @@ if __name__ == '__main__':
output = config["common"]["output"]
properties = config["common"]["properties"]
barcode = config["common"]["barcode"]
samplename = config["common"]["samplename"]
directory = config["common"]["directory"]
trimreads = config["common"]["trimreads"]
rename = config["common"]["rename"]
# logging config
if not os.path.isdir(output):
......@@ -88,27 +86,25 @@ if __name__ == '__main__':
# Keep only fastq files type
readsfiles=[reads for reads in readsfiles if "fastq.gz" in reads]
treated=[]
pattern = re.compile("(.)+_[1-6]_1_(.)+fastq.gz")
patternGenoscope = re.compile("(.)+_[1-9]_1_(.)+fastq.gz")
patternClassique = re.compile("(.)+R1.fastq.gz")
counter=0
for rf in readsfiles :
if rf not in treated and pattern.match(rf):
if rf not in treated :
counter=counter+1
R1 = rf
if '_2_1_' in str(rf):
R2 = rf.replace('_2_1_', '_2_2_')
elif '_1_1_' in str(rf) :
R2 = rf.replace('_1_1_', '_1_2_')
elif '_3_1_' in str(rf) :
R2 = rf.replace('_3_1_', '_3_2_')
elif '_4_1_' in str(rf) :
R2 = rf.replace('_4_1_', '_4_2_')
elif '_5_1_' in str(rf) :
R2 = rf.replace('_5_1_', '_5_2_')
elif '_6_1_' in str(rf) :
R2 = rf.replace('_6_1_', '_6_2_')
if patternGenoscope.match(rf):
if '_1_1_' in str(rf):
R2 = rf.replace('_1_1_', '_1_2_')
else :
R2 = rf.replace('_1_', '_2_')
else :
logger.debug("No pair found for sample {}, exiting program...".format(R1))
sys.exit(1)
elif patternClassique.match(rf):
R2 = rf.replace('R1', 'R2')
else :
logger.debug("No pair found for sample {}, exiting program...".format(R1))
sys.exit(1)
logger.debug("File {} does not match patterns .R[1-2].fastq.gz or _[1-9]_[1-2]XXX.fastq.gz".format(rf))
logger.debug("R1 = {}".format(R1))
logger.debug("R2 = {}".format(R2))
......@@ -122,10 +118,8 @@ if __name__ == '__main__':
os.environ['output']=output
os.environ['barcode']=barcode
os.environ['properties']=properties
os.environ['samplename']=samplename
os.environ['tmpdir']=tmpdir
os.environ['trimreads']=trimreads
os.environ['rename']=rename
cmd="qsub -o {}/logs -V {}/extractR1R2.pbs".format(output, directory)
......@@ -134,9 +128,8 @@ if __name__ == '__main__':
logger.debug("job number : {}".format(out))
#check if number of generated files is ok
if not rename :
cmd="qsub -o {}/logs -V -W depend=afterok:{} {}/check.pbs".format(output,":".join(jobs), directory)
check = runcmd(cmd).strip().split(".")[0]
cmd="qsub -o {}/logs -V -W depend=afterok:{} {}/check.pbs".format(output,":".join(jobs), directory)
check = runcmd(cmd).strip().split(".")[0]
else :
logging.debug("No fastq.gz files found in {}, exiting program".format(indir))
sys.exit(1)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment