Skip to content

Commit dd39d4b

Browse files
committed
modified: INSTALL.sh
modified: README.md modified: assemblatron.py
1 parent 09c34c3 commit dd39d4b

File tree

3 files changed

+58
-9
lines changed

3 files changed

+58
-9
lines changed

INSTALL.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,9 @@ pip install BESST
88

99
cd scripts
1010
python setup.py build_ext --inplace
11+
12+
cd ..
13+
14+
git clone https://github.com/ablab/quast.git
15+
cd quast
16+
pip install -e .

README.md

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ The full workflow involves:
1515

1616
3: alignment of the contigs
1717

18-
4: compute assembly statistics
18+
4: Quality control, using assemblatron stats or quast
1919

2020
5: variant calling
2121

@@ -26,10 +26,9 @@ The assemblatron workflow is run through the following commands:
2626

2727
run the install script:
2828
./INSTALL.sh
29-
The install script will download and install BESST, fermikit, and tiddit.
29+
The install script will download and install BESST, fermikit, and quast.
3030
Dependencies:
3131

32-
vcftools
3332
samtools
3433
python 2.7
3534
bwa
@@ -71,12 +70,23 @@ Assemblatron performs alignment using bwa mem. The output is printed to a file n
7170
A bam file named <prefix>.bam will be produced
7271

7372
# Stats
74-
compute various statistics of an assembly. THe output is printed to stdout.
73+
compute various statistics of an assembly. The output is printed to stdout.
7574

76-
python assemblatron.py --stats <contigs_bam>
75+
python assemblatron.py --stats --bam <contigs_bam>
7776

7877
The statistics include N50, L50, assembly size, and the number of contigs.
7978

79+
# Quast
80+
81+
The quality control may be performed using quast. Quast performs a more in-depth but slower analysis.
82+
83+
python assemblatron.py --quast --fasta <contigs_fasta> --output <output_folder>
84+
85+
python assemblatron.py --quast --fasta <contigs_fasta> --ref <reference.fasta> --output <output_folder>
86+
87+
The statistics module supports any number of fasta files. Type --help for more information.
88+
NOTE: use absolute path for the output directory.
89+
8090
# SV
8191
Call SV. Assemblatron will calssify variants as DEL, INV, BND (translocation or complex), INS, and TDUP.
8292

@@ -102,7 +112,7 @@ Call indels and SNV using htsbox pileup (same as fermikit).
102112

103113
the output is printed to stdout
104114

105-
other options:
115+
options:
106116

107117
-h, --help show this help message and exit
108118
--bam BAM input bam (contigs)
@@ -140,3 +150,7 @@ Cite the components that you used, as well as the Assemblatron git hub page.
140150
If you performed scaffolding, please cite the BESST paper:
141151

142152
https://github.com/ksahlin/BESST
153+
154+
For more info on QUAST:
155+
156+
https://github.com/ablab/quast

assemblatron.py

Lines changed: 32 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ def assemble(args,wd):
3535
parser.add_argument('--align' , help="align contigs to reference using bwa mem", required=False, action="store_true")
3636
parser.add_argument('--fasta' , help="convert aligned contigs bam file to fasta", required=False, action="store_true")
3737
parser.add_argument('--fastq' , help="convert bam to fastq", required=False, action="store_true")
38+
parser.add_argument('--quast' , help="compute assembly stats using quast", required=False, action="store_true")
3839

3940
args, unknown = parser.parse_known_args()
4041

@@ -79,6 +80,25 @@ def assemble(args,wd):
7980
args= parser.parse_args()
8081

8182
stats.assembly_stats(args)
83+
elif args.quast:
84+
85+
parser = argparse.ArgumentParser("""QUAST - quality control""")
86+
parser.add_argument('--quast' , help="compute assembly stats using quast", required=False, action="store_true")
87+
parser.add_argument('--contigs', nargs='*', help="input contigs (multiple assemblies are allowed)", required=True)
88+
parser.add_argument('--ref',required = False,type=str, help="reference fasta")
89+
parser.add_argument('--output',required = True,type=str, help="output folder")
90+
parser.add_argument('--features',nargs='*',required = False,type=str, help="Feature BED/GFF file")
91+
parser.add_argument('--len',default=100,type=int, help="minimum contig length (default= 100 bp)")
92+
args= parser.parse_args()
93+
94+
quast="quast.py {} --output-dir {} --min-contig {}".format(" ".join(args.contigs),args.output,args.len)
95+
96+
if args.ref:
97+
quast+=" -r {}".format(args.ref)
98+
99+
if args.features:
100+
quast+=" -g {}".format(" ".join(args.features))
101+
os.system(quast)
82102

83103
elif args.align:
84104
parser = argparse.ArgumentParser("""Assemblatron align - align contigs to the reference using bwa mem""")
@@ -115,6 +135,8 @@ def assemble(args,wd):
115135
parser.add_argument('--mem' , help="maximum mempry per thread (gigabytes)", type=int, default=4)
116136
parser.add_argument('--iter' , help="Number of itterations (default = 500000)", type=int, default=500000)
117137
parser.add_argument('--cores' ,type=int, default = 8, help="number of cores (default = 2)", required=False)
138+
parser.add_argument('-q' ,type=int, help="minimum mapping quality for scaffolding", required=False)
139+
parser.add_argument('-p' ,type=int, help="minimum number of read-pairs to create edge", required=False)
118140
args= parser.parse_args()
119141

120142
args.prefix=args.filename
@@ -132,10 +154,17 @@ def assemble(args,wd):
132154
os.system("samtools index {}".format(args.bam))
133155

134156
if args.rf:
135-
os.system("runBESST -c {} -f {} -orientation rf -o {} --iter {}".format(args.contigs,args.bam,args.output,args.iter))
157+
besst="runBESST -c {} -f {} -orientation rf -o {} -plots --iter {}".format(args.contigs,args.bam,args.output,args.iter)
136158
else:
137-
os.system("runBESST -c {} -f {} -orientation fr -o {} --iter {}".format(args.contigs,args.bam,args.output,args.iter))
138-
159+
besst="runBESST -c {} -f {} -orientation fr -o {} -plots --iter {}".format(args.contigs,args.bam,args.output,args.iter)
160+
161+
if args.q:
162+
besst+= " --min_mapq {}".format(args.q)
163+
164+
if args.p:
165+
besst+= " -e {}".format(args.p)
166+
167+
os.system(besst)
139168
elif args.fastq:
140169

141170
parser = argparse.ArgumentParser("""Assemblatron fastq - converts bam to fastq using samtools""")

0 commit comments

Comments
 (0)