Commit 95ea5e09 authored by Laure QUINTRIC's avatar Laure QUINTRIC

merge R1F/R2F and R2R/R1R to create samples.tar archive for frogs

parent 7612faab
......@@ -103,38 +103,43 @@ def extractReads(inR1, inR2, outdir, forward, reverse, mismatch, tmpdir, trimrea
logging.debug("Couple name : {}".format(couplename))
samplename="{}_{}".format(couplename, R1F)
R1cutadaptoutname="{}/{}-cutadapt.fastq.gz".format(outdir, samplename)
runCutadapt(forward, mismatch, R1cutadaptoutname, samplename, inR1, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/2$/s/\/2/\/1/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R1cutadaptoutname, R1cutadaptoutname, R1cutadaptoutname, R1cutadaptoutname)
R1cutadaptoutnameF="{}/{}-cutadapt.fastq.gz".format(outdir, samplename)
runCutadapt(forward, mismatch, R1cutadaptoutnameF, samplename, inR1, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/2$/s/\/2/\/1/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R1cutadaptoutnameF, R1cutadaptoutnameF, R1cutadaptoutnameF, R1cutadaptoutnameF)
out = runcmd(cmd)
samplename="{}_{}".format(couplename, R2R)
R2cutadaptoutname="{}/{}-cutadapt.fastq.gz".format(outdir, samplename)
runCutadapt(reverse, mismatch, R2cutadaptoutname, samplename, inR2, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/1$/s/\/1/\/2/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R2cutadaptoutname, R2cutadaptoutname, R2cutadaptoutname, R2cutadaptoutname)
R2cutadaptoutnameR="{}/{}-cutadapt.fastq.gz".format(outdir, samplename)
runCutadapt(reverse, mismatch, R2cutadaptoutnameR, samplename, inR2, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/1$/s/\/1/\/2/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R2cutadaptoutnameR, R2cutadaptoutnameR, R2cutadaptoutnameR, R2cutadaptoutnameR)
out = runcmd(cmd)
singletons="{}/{}_R1F-R2Rsingletons.fastq.gz".format(outdir, couplename)
R1repaired= R1cutadaptoutname.replace("-cutadapt", "")
R2repaired= R2cutadaptoutname.replace("-cutadapt", "")
rePairReads(outdir, R1repaired, R2repaired , singletons, R1cutadaptoutname, R2cutadaptoutname)
R1repairedF= R1cutadaptoutnameF.replace("-cutadapt", "")
R2repairedR= R2cutadaptoutnameR.replace("-cutadapt", "")
rePairReads(outdir, R1repairedF, R2repairedR, singletons, R1cutadaptoutnameF, R2cutadaptoutnameR)
samplename="{}_{}".format(couplename, R1R)
R1cutadaptoutname="{}/{}-cutadapt.fastq.gz".format(outdir,samplename)
runCutadapt(reverse, mismatch, R1cutadaptoutname, samplename, inR1, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/2$/s/\/2/\/1/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R1cutadaptoutname, R1cutadaptoutname, R1cutadaptoutname, R1cutadaptoutname)
R1cutadaptoutnameR="{}/{}-cutadapt.fastq.gz".format(outdir,samplename)
runCutadapt(reverse, mismatch, R1cutadaptoutnameR, samplename, inR1, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/2$/s/\/2/\/1/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R1cutadaptoutnameR, R1cutadaptoutnameR, R1cutadaptoutnameR, R1cutadaptoutnameR)
out = runcmd(cmd)
samplename="{}_{}".format(couplename, R2F)
R2cutadaptoutname="{}/{}-cutadapt.fastq.gz".format(outdir,samplename)
runCutadapt(forward, mismatch, R2cutadaptoutname, samplename, inR2, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/1$/s/\/1/\/2/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R2cutadaptoutname, R2cutadaptoutname, R2cutadaptoutname, R2cutadaptoutname)
R2cutadaptoutnameF="{}/{}-cutadapt.fastq.gz".format(outdir,samplename)
runCutadapt(forward, mismatch, R2cutadaptoutnameF, samplename, inR2, tmpdir, trimreads)
cmd = "gunzip < {} | sed -e '/@[A-Z][0-9]:.*\/1$/s/\/1/\/2/g' | gzip -c > {}.tempo.gz ; mv {}.tempo.gz {}".format(R2cutadaptoutnameF, R2cutadaptoutnameF, R2cutadaptoutnameF, R2cutadaptoutnameF)
out = runcmd(cmd)
singletons="{}/{}_R1R-R2Fsingletons.fastq.gz".format(outdir, couplename)
R1repaired= R1cutadaptoutname.replace("-cutadapt", "")
R2repaired= R2cutadaptoutname.replace("-cutadapt", "")
rePairReads(outdir, R1repaired, R2repaired , singletons, R1cutadaptoutname, R2cutadaptoutname)
R1repairedR= R1cutadaptoutnameR.replace("-cutadapt", "")
R2repairedF= R2cutadaptoutnameF.replace("-cutadapt", "")
rePairReads(outdir, R1repaired, R2repaired , singletons, R1cutadaptoutnameR, R2cutadaptoutnameF)
cmd = "zcat {} {} > {} ; gzip {}".format(R1repairedF, R2repairedF, R1repairedF.replace("_R1F", "R1"), R1repairedF.replace("_R1F", "R1"))
out = runcmd(cmd)
cmd = "zcat {} {} > {} ; gzip {}".format(R1repairedF, R2repairedF, R1repairedF.replace("_R1F", "R1"), R1repairedF.replace("_R1F", "R1"))
out = runcmd(cmd)
def runCutadapt(primerseq, mismatch, outfile, samplename, infile, tmpdir, trimreads) :
logging.debug("Going to run cutadapt on file {}".format(infile))
......@@ -222,6 +227,6 @@ if __name__ == '__main__':
extractReads(inR1, inR2, outdir, forward, reverse, mismatch, tmpdir, trimreads)
#create frogs archive
os.system("mkdir -p {}/frogs; cd {}; tar cvf frogs/samples.tar *_{{R1F,R2R,R2F,R1R}}.fastq.gz".format(outdir,outdir))
os.system("mkdir -p {}/frogs; cd {}; tar cvf frogs/samples.tar *_{{R1,R2}}.fastq.gz".format(outdir,outdir))
logging.debug("End of processing")
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment