From fc7a50ab864439a24a2704209255e8506173abaf Mon Sep 17 00:00:00 2001 From: Helena Winata Date: Sat, 19 Jun 2021 17:48:52 -0700 Subject: [PATCH 1/4] small edits to work with delly vcf --- svtools/cli.py | 2 +- svtools/vcftobedpe.py | 5 ++++- svtools/vcftobedpeconverter.py | 9 ++++++--- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/svtools/cli.py b/svtools/cli.py index 9e3bb26f..74bd4c75 100644 --- a/svtools/cli.py +++ b/svtools/cli.py @@ -18,7 +18,7 @@ class SupportAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): support_string = 'For further help or to report a bug, please open an issue on the svtools repository: https://github.com/hall-lab/svtools/issues' - print support_string + print(support_string) sys.exit() def svtools_cli_parser(): diff --git a/svtools/vcftobedpe.py b/svtools/vcftobedpe.py index c83c211c..ac2bc06c 100755 --- a/svtools/vcftobedpe.py +++ b/svtools/vcftobedpe.py @@ -65,12 +65,15 @@ def vcfToBedpe(vcf_file, bedpe_out): v = line.rstrip().split('\t') var = svtools.vcf.variant.Variant(v, vcf) - var.set_info("POS", var.pos) + var["POS"] = var.pos # If there is no MATEID then assume this is a single-ended BND and simply output if var.info['SVTYPE'] != 'BND' or 'MATEID' not in var.info: bedpe_out.write(str(converter.convert(var)) + '\n') else: mate_id = var.info['MATEID'] + if "_" in mate_id: + mate_id = mate_id.split('_')[0] + if 'SECONDARY' in var.info: if mate_id in bnds: #primary diff --git a/svtools/vcftobedpeconverter.py b/svtools/vcftobedpeconverter.py index 8908fd70..f45bad2c 100644 --- a/svtools/vcftobedpeconverter.py +++ b/svtools/vcftobedpeconverter.py @@ -74,7 +74,7 @@ def adjust_coordinate(vcf_variant, info_tag, start, end): of the tag (if it exists) ''' if info_tag in vcf_variant.info: - span = map(int, vcf_variant.info[info_tag].split(',')) + span = list(map(int, vcf_variant.info[info_tag].split(','))) if len(span) != 2: raise ValueError('Invalid value for tag {0}. Require 2 values to adjust coordinates.'.format(info_tag)) return (start + span[0], end + span[1]) @@ -137,7 +137,7 @@ def convert(self, primary_variant, secondary_variant=None): - fields = map(str, [ + fields = list(map(str, [ c1, max(s1, 0), max(e1, 0), @@ -158,8 +158,11 @@ def convert(self, primary_variant, secondary_variant=None): orig_alt_b, info_a, info_b, - ]) + ])) if vcf_variant.get_format_string() is not None: fields += [vcf_variant.get_format_string(), vcf_variant.get_gt_string()] + print(fields) + print('VARIANT', vcf_variant) + print(vcf_variant.get_format_string()) return Bedpe(fields) From c289f4868984bf27278dee3ff8e8a033dd55797a Mon Sep 17 00:00:00 2001 From: Helena Winata Date: Sat, 19 Jun 2021 18:02:25 -0700 Subject: [PATCH 2/4] minor edits --- svtools/breakpoint.py | 4 ++-- svtools/vcfpaste.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/svtools/breakpoint.py b/svtools/breakpoint.py index 689e2ba6..010301b5 100644 --- a/svtools/breakpoint.py +++ b/svtools/breakpoint.py @@ -1,7 +1,7 @@ import sys -import l_bp -from exceptions import MissingProbabilitiesException +import svtools.l_bp +from svtools.exceptions import MissingProbabilitiesException class BreakpointInterval(object): ''' diff --git a/svtools/vcfpaste.py b/svtools/vcfpaste.py index ca43b545..5100d063 100644 --- a/svtools/vcfpaste.py +++ b/svtools/vcfpaste.py @@ -34,7 +34,7 @@ def open_files(self): self.vcf_files = [] # parse the vcf files to paste for path in self.vcf_file_names: - self.vcf_files.append(InputStream(path, self.tempdir)) + self.vcf_files.append(InputStream(path, self.tempdir)) def write_header(self, output_handle=sys.stdout): master = self.vcf_files[0] From 8d3a10f816ce300b8c265a5e54eeb4a01b89b196 Mon Sep 17 00:00:00 2001 From: Helena Winata Date: Sun, 20 Jun 2021 14:28:39 -0700 Subject: [PATCH 3/4] tested to work with delly vcfs --- svtools/utils.py | 2 +- svtools/vcftobedpe.py | 6 +++--- svtools/vcftobedpeconverter.py | 30 ++++++++++++++++++------------ 3 files changed, 22 insertions(+), 16 deletions(-) diff --git a/svtools/utils.py b/svtools/utils.py index 366a438e..a5a34b27 100644 --- a/svtools/utils.py +++ b/svtools/utils.py @@ -124,7 +124,7 @@ def parse_bnd_alt_string(alt_string): # NOTE The below is ugly but intended to match things like [2:222[ and capture the brackets result = re.findall(r'([][])(.+?)([][])', alt_string) assert result, "%s\n" % alt_string - #sys.stderr.write("%s\n" % alt_string) + sys.stderr.write("%s\n" % alt_string) sep1, region, sep2 = result[0] assert sep1 == sep2 chrom2, breakpoint2 = region.rsplit(':', 1) diff --git a/svtools/vcftobedpe.py b/svtools/vcftobedpe.py index ac2bc06c..06aa7ec2 100755 --- a/svtools/vcftobedpe.py +++ b/svtools/vcftobedpe.py @@ -65,7 +65,7 @@ def vcfToBedpe(vcf_file, bedpe_out): v = line.rstrip().split('\t') var = svtools.vcf.variant.Variant(v, vcf) - var["POS"] = var.pos + var.set_info("POS", var.pos) # If there is no MATEID then assume this is a single-ended BND and simply output if var.info['SVTYPE'] != 'BND' or 'MATEID' not in var.info: bedpe_out.write(str(converter.convert(var)) + '\n') @@ -73,7 +73,7 @@ def vcfToBedpe(vcf_file, bedpe_out): mate_id = var.info['MATEID'] if "_" in mate_id: mate_id = mate_id.split('_')[0] - + if 'SECONDARY' in var.info: if mate_id in bnds: #primary @@ -92,7 +92,7 @@ def vcfToBedpe(vcf_file, bedpe_out): bedpe_out.write(str(converter.convert(var1, var)) + '\n') del bnds[mate_id] else: - bnds.update({var.var_id:var}) + bnds.update({mate_id:var}) if bnds is not None: for bnd in bnds: sys.stderr.write('Warning: missing secondary multiline variant at ID:' + bnd + '\n') diff --git a/svtools/vcftobedpeconverter.py b/svtools/vcftobedpeconverter.py index f45bad2c..56c9e0a5 100644 --- a/svtools/vcftobedpeconverter.py +++ b/svtools/vcftobedpeconverter.py @@ -19,17 +19,24 @@ def bnd_breakpoints(self, vcf_variant): ''' chrom1 = vcf_variant.chrom breakpoint1 = vcf_variant.pos - orientation1 = orientation2 = '+' - sep, chrom2, breakpoint2 = parse_bnd_alt_string(vcf_variant.alt) - breakpoint2 = int(breakpoint2) + if 'MATECHROM' in vcf_variant.info: + chrom2 = vcf_variant.info['MATECHROM'] + breakpoint2 = int(vcf_variant.info['MATEPOS']) + orientation1 = vcf_variant.info['STRAND'] + orientation2 = vcf_variant.info['MATESTRAND'] - if vcf_variant.alt.startswith(sep): - orientation1 = '-' - breakpoint1 -= 1 + else: + orientation1 = orientation2 = '+' + sep, chrom2, breakpoint2 = parse_bnd_alt_string(vcf_variant.alt) + breakpoint2 = int(breakpoint2) + + if vcf_variant.alt.startswith(sep): + orientation1 = '-' + breakpoint1 -= 1 - if sep == '[': - orientation2 = '-' - breakpoint2 -= 1 + if sep == '[': + orientation2 = '-' + breakpoint2 -= 1 return (chrom1, breakpoint1, @@ -129,6 +136,8 @@ def convert(self, primary_variant, secondary_variant=None): # XXX This has probably already been calculated outside of this method. May be a candidate to memoize or otherwise cache? # By adding to the variant class, perhaps? name = vcf_variant.var_id + if '_' in name: + name = name.split('_')[0] if 'EVENT' in vcf_variant.info: name = vcf_variant.info['EVENT'] elif 'MATEID' in vcf_variant.info and vcf_variant.var_id.startswith('Manta'): @@ -161,8 +170,5 @@ def convert(self, primary_variant, secondary_variant=None): ])) if vcf_variant.get_format_string() is not None: fields += [vcf_variant.get_format_string(), vcf_variant.get_gt_string()] - print(fields) - print('VARIANT', vcf_variant) - print(vcf_variant.get_format_string()) return Bedpe(fields) From 23ce5a35e56e980f0ceca3a39ba91c5a69df0398 Mon Sep 17 00:00:00 2001 From: Helena Winata Date: Sun, 20 Jun 2021 14:36:05 -0700 Subject: [PATCH 4/4] revert some changes --- svtools/cli.py | 2 +- svtools/utils.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/svtools/cli.py b/svtools/cli.py index 74bd4c75..9e3bb26f 100644 --- a/svtools/cli.py +++ b/svtools/cli.py @@ -18,7 +18,7 @@ class SupportAction(argparse.Action): def __call__(self, parser, namespace, values, option_string=None): support_string = 'For further help or to report a bug, please open an issue on the svtools repository: https://github.com/hall-lab/svtools/issues' - print(support_string) + print support_string sys.exit() def svtools_cli_parser(): diff --git a/svtools/utils.py b/svtools/utils.py index a5a34b27..366a438e 100644 --- a/svtools/utils.py +++ b/svtools/utils.py @@ -124,7 +124,7 @@ def parse_bnd_alt_string(alt_string): # NOTE The below is ugly but intended to match things like [2:222[ and capture the brackets result = re.findall(r'([][])(.+?)([][])', alt_string) assert result, "%s\n" % alt_string - sys.stderr.write("%s\n" % alt_string) + #sys.stderr.write("%s\n" % alt_string) sep1, region, sep2 = result[0] assert sep1 == sep2 chrom2, breakpoint2 = region.rsplit(':', 1)