-
Notifications
You must be signed in to change notification settings - Fork 11
Open
Description
I have been using TxDb to parse GFF and obtain transcripts, but I have been getting these errors intermittently, which seem to be gc related and hard to reproduce:
Enter a frame number, or 0 to exit
1: run_FLAMES(y[[i]])
2: run_FLAMES(y[[i]])
3: tryCatch(run_step(pipeline, step, disable_controller = FALSE), error = func
4: tryCatchList(expr, classes, parentenv, handlers)
5: tryCatchOne(expr, names, parentenv, handlers[[1]])
6: doTryCatch(return(expr), name, parentenv, handler)
7: run_step(pipeline, step, disable_controller = FALSE)
8: run_step(pipeline, step, disable_controller = FALSE)
9: transcript_quantification(pipeline)
10: transcript_quantification(pipeline)
11: quantify_transcript(annotation = annotation, outdir = pipeline@outdir, conf
12: quantify_transcript_oarfish(annotation, outdir, config, pipeline, ...)
13: parse_oarfish_sc_output(oarfish_out, annotation, outdir)
14: tryCatch(GenomicRanges::mcols(annotation_grl)[, "gene_id"], error = functio
15: tryCatchList(expr, classes, parentenv, handlers)
16: tryCatchOne(expr, names, parentenv, handlers[[1]])
17: value[[3]](cond)
18: tryCatch({
txdb_list <- lapply(lapply(c(annotation, novel_annotation), f
19: tryCatchList(expr, classes, parentenv, handlers)
20: lapply(lapply(c(annotation, novel_annotation), fake_stranded_gff), txdbmake
21: FUN(X[[i]], ...)
22: import(file, format = format, colnames = colnames, feature.type = GFF_FEATU
23: import(file, format = format, colnames = colnames, feature.type = GFF_FEATU
24: import(FileForFormat(con, format), ...)
25: import(FileForFormat(con, format), ...)
26: .local(con, format, text, ...)
27: readGFFAsGRanges(resource, version = version, colnames = colnames, filter =
28: makeGRangesFromDataFrame(df, seqnames.field = "seqid")
29: df[[granges_cols[["strand"]]]]
30: df[[granges_cols[["strand"]]]]
31: callNextMethod()
32: .nextMethod(x = x, i = i)
33: selectMethod("[[", "SimpleList")
34: .findInheritedMethods(signature, fdef, mtable = allmethods, table = mlist,
35: .inheritedArgsExpression(m@target, m@defined, body(m))
36: (function (x)
x$.self$finalize())(<environment>)
Selection: 36
Called from: top level
Browse[1]> ls()
class(x)
ls(envir = x, all.names = TRUE)
[1] "x"
[1] "environment"
[1] ".->conn" ".->isActiveSeq" ".->packageName"
[4] ".->user_genome" ".->user_seqlevels" ".->user2seqlevels0"
[7] ".refClassDef" ".self" "conn"
[10] "initFields" "initialize" "isActiveSeq"
[13] "packageName" "user_genome" "user_seqlevels"
[16] "user2seqlevels0"
Browse[1]> get(".self", envir = x)
TxDb object:
# Db type: TxDb
# Supporting package: GenomicFeatures
# Data source: /tmp/RtmpxSqkAN/file305cb064043f29/isoform_annotated.gff3
# Organism: NA
# Taxonomy ID: NA
# miRBase build ID: NA
# Genome: NA
# Nb of transcripts: 14
# Db created by: txdbmaker package from Bioconductor
# Creation time: 2025-07-18 15:40:24 +1000 (Fri, 18 Jul 2025)
# txdbmaker version at creation time: 1.0.1
# RSQLite version at creation time: 2.3.7
# DBSCHEMAVERSION: 1.2
Browse[1]> get(".self", envir = x)$.self
TxDb object:
# Db type: TxDb
# Supporting package: GenomicFeatures
# Data source: /tmp/RtmpxSqkAN/file305cb064043f29/isoform_annotated.gff3
# Organism: NA
# Taxonomy ID: NA
# miRBase build ID: NA
# Genome: NA
# Nb of transcripts: 14
# Db created by: txdbmaker package from Bioconductor
# Creation time: 2025-07-18 15:40:24 +1000 (Fri, 18 Jul 2025)
# txdbmaker version at creation time: 1.0.1
# RSQLite version at creation time: 2.3.7
# DBSCHEMAVERSION: 1.2
Browse[1]> str(get(".self", envir = x)$.self)
Prototypical reference class 'TxDb' [package "GenomicFeatures"]
Browse[1]>
Code chunk for frame 14:
SummarizedExperiment::rowData(sce)$gene_id <- tryCatch(
GenomicRanges::mcols(annotation_grl)[, "gene_id"],
error = function(e) {
sprintf("Error when adding gene ID: %s, trying again with txdbmaker...", e$message)
gene_id_tb <- tryCatch({
txdb_list <- c(annotation, novel_annotation) |>
lapply(fake_stranded_gff) |>
lapply(txdbmaker::makeTxDbFromGFF)
gene_id_tb <- txdb_list |>
lapply(\(x) GenomicFeatures::transcripts(x, columns = c("gene_id", "tx_name"))) |>
lapply(\(x) GenomicRanges::mcols(x)) |>
do.call(rbind, args = _)
gene_id_tb
})
gene_id_tb[match(rownames(sce), gene_id_tb$tx_name), "gene_id"] |>
as.character()
}
)
#' Fake stranded GFF file
#' @description Check if all the transcript in the annotation is stranded. If not, convert to '+'.
#' @return Path to the temporary file with unstranded transcripts converted to '+'.
#' @keywords internal
fake_stranded_gff <- function(gff_file) {
# check if all the transcript in the annotation is stranded
annotation_d <- read.csv(gff_file, sep = "\t",
header = FALSE, stringsAsFactors = FALSE,
comment.char = "#")
strands <- annotation_d[, 7]
if (any(strands == '.')) {
modified_gtf <- paste0(tempfile(), '/tmp.gtf')
dir.create(dirname(modified_gtf))
warning(sprintf("Some transcripts in the annotation file %s are not stranded. Converting to '+' in temporary file %s", gff_file, modified_gtf))
strands[strands == '.'] <- '+'
annotation_d[, 7] <- strands
write.table(annotation_d, modified_gtf, sep = "\t",
row.names = FALSE, quote = FALSE, col.names = FALSE)
return(modified_gtf)
# file will get deleted after quitting R
}
return(gff_file)
}
Metadata
Metadata
Assignees
Labels
No labels