### =========================================================================
### The transcripts(), exons(), cds() and promoters() extractors
### -------------------------------------------------------------------------


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### A high-level representation of the db relational model used for
### generating SQL queries.
###

### Note that we omit the *_start and *_end cols.
.ALLCOLS <- c("gene_id",
              "tx_id", "tx_name", "tx_chrom", "tx_strand",
              "exon_id", "exon_name", "exon_chrom", "exon_strand",
              "cds_id", "cds_name", "cds_chrom", "cds_strand",
              "exon_rank")

.CORETAGS <- c("id", "chrom", "strand", "start", "end")

### THE TRANSCRIPT CENTRIC POINT OF VIEW:
### If we look at the db from a transcript centric point of view, then the
### graph of relations between the tables becomes a tree where the 'transcript'
### table is the root:
###                            transcript
###                             /      \
###                          gene    splicing
###                                  /      \
###                                exon     cds
###
.TRANSCRIPT_CENTRIC_DBDESC <- list(
    CORECOLS=structure(paste("tx", .CORETAGS, sep="_"), names=.CORETAGS),
    ### Each col defined in the db is assigned to the closest table (starting
    ### from the 'transcript' table) where it can be found. The 'transcript'
    ### element must be the 1st in the list:
    COLMAP=list(
        transcript=c("tx_id", makeFeatureColnames("tx")),
        gene="gene_id",
        splicing=c("exon_rank", "exon_id", "_exon_id", "cds_id", "_cds_id"),
        exon=c("exon_name", "exon_chrom", "exon_strand",
               "exon_start", "exon_end"),
        cds=c("cds_name", "cds_chrom", "cds_strand", "cds_start", "cds_end")
    ),
    ### For each table that is not the root or a leaf table in the above tree,
    ### we list the tables that are below it:
    CHILDTABLES=list(
        splicing=c("exon", "cds")
    ),
    ### For each table that is not the root table in the above tree, we specify
    ### the join condition with the parent table:
    JOINS=c(
        gene="transcript._tx_id=gene._tx_id",
        splicing="transcript._tx_id=splicing._tx_id",
        exon="splicing._exon_id=exon._exon_id",
        cds="splicing._cds_id=cds._cds_id"
    )
)

### THE EXON CENTRIC POINT OF VIEW:
### If we look at the db from an exon centric point of view, then the
### graph of relations between the tables becomes a tree where the 'exon'
### table is the root:
###                               exon
###                                |
###                             splicing
###                            /   |    \
###                   transcript  gene  cds
###
.EXON_CENTRIC_DBDESC <- list(
    CORECOLS=structure(paste("exon", .CORETAGS, sep="_"), names=.CORETAGS),
    ### Each col defined in the db is assigned to the closest table (starting
    ### from the 'exon' table) where it can be found. The 'exon' element must
    ### be the 1st in the list:
    COLMAP=list(
        exon=c("exon_id", makeFeatureColnames("exon")),
        splicing=c("tx_id", "_tx_id", "exon_rank", "cds_id", "_cds_id"),
        transcript=c("tx_name", "tx_chrom", "tx_strand", "tx_start", "tx_end"),
        gene="gene_id",
        cds=c("cds_name", "cds_chrom", "cds_strand", "cds_start", "cds_end")
    ),
    ### For each table that is not the root or a leaf table in the above tree,
    ### we list the tables that are below it:
    CHILDTABLES=list(
        splicing=c("transcript", "gene", "cds")
    ),
    ### For each table that is not the root table in the above tree, we specify
    ### the join condition with the parent table:
    JOINS=c(
        splicing="exon._exon_id=splicing._exon_id",
        transcript="splicing._tx_id=transcript._tx_id",
        gene="splicing._tx_id=gene._tx_id",
        cds="splicing._cds_id=cds._cds_id"
    )
)

### THE CDS CENTRIC POINT OF VIEW:
### If we look at the db from a cds centric point of view, then the
### graph of relations between the tables becomes a tree where the 'cds'
### table is the root:
###                               cds
###                                |
###                             splicing
###                            /   |    \
###                   transcript  gene  exon
###
.CDS_CENTRIC_DBDESC <- list(
    CORECOLS=structure(paste("cds", .CORETAGS, sep="_"), names=.CORETAGS),
    ### Each col defined in the db is assigned to the closest table (starting
    ### from the 'cds' table) where it can be found. The 'cds`' element must
    ### be the 1st in the list:
    COLMAP=list(
        cds=c("cds_id", makeFeatureColnames("cds")),
        splicing=c("tx_id", "_tx_id", "exon_rank", "exon_id", "_exon_id"),
        transcript=c("tx_name", "tx_chrom", "tx_strand", "tx_start", "tx_end"),
        gene="gene_id",
        exon=c("exon_name", "exon_chrom", "exon_strand",
               "exon_start", "exon_end")
    ),
    ### For each table that is not the root or a leaf table in the above tree,
    ### we list the tables that are below it:
    CHILDTABLES=list(
        splicing=c("transcript", "gene", "exon")
    ),
    ### For each table that is not the root table in the above tree, we specify
    ### the join condition with the parent table:
    JOINS=c(
        splicing="cds._cds_id=splicing._cds_id",
        transcript="splicing._tx_id=transcript._tx_id",
        gene="splicing._tx_id=gene._tx_id",
        exon="splicing._exon_id=exon._exon_id"
    )
)

.DBDESC <- list(
    transcript=.TRANSCRIPT_CENTRIC_DBDESC,
    exon=.EXON_CENTRIC_DBDESC,
    cds=.CDS_CENTRIC_DBDESC
)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Some utility functions.
###


## For converting user arguments FROM the UC style down to what we use
## internally
translateCols <- function(columns, txdb){
    ## preserve any names
    oriColNames <- names(columns)
    ## and save the original column strings
    oriCols <- columns
    
    oriLen <- length(columns) ## not always the same as length(oriCols)
    ## get the available abbreviations as a translation vector (exp)
    names <- .makeColAbbreviations(txdb)
    exp <- sub("^_","", names(names))
    names(exp) <- names

    ## Then replace only those IDs that match the UC names
    m <- match(oriCols, names(exp))
    idx = which(!is.na(m))
    columns[idx] <- exp[m[idx]]
    
    if(length(columns) == oriLen && is.null(oriColNames)){
        names(columns) <- oriCols 
    }else if(length(columns) == oriLen && !is.null(oriColNames)){
        names(columns) <- oriColNames         
    }else if(length(columns) != oriLen){
        stop("names were lost in translateCols() helper")
    }    
    columns
}




.getClosestTable <- function(root_table, colnames)
{
    COLMAP <- .DBDESC[[root_table]]$COLMAP
    ans <- character(length(colnames))
    ans[] <- NA_character_
    for (tablename in names(COLMAP))
        ans[colnames %in% COLMAP[[tablename]]] <- tablename
    ans
}

.assignColToClosestTable <- function(root_table, colnames)
{
    COLMAP <- .DBDESC[[root_table]]$COLMAP
    lapply(COLMAP, function(cols) intersect(cols, colnames))
}

.asQualifiedColnames <- function(root_table, colnames)
{
    colnames[colnames == "tx_id"] <- "_tx_id"
    colnames[colnames == "exon_id"] <- "_exon_id"
    colnames[colnames == "cds_id"] <- "_cds_id"
    paste(.getClosestTable(root_table, colnames), colnames, sep=".")
}

.joinRootToChildTables <- function(root_table, child_tables)
{
    COLMAP <- .DBDESC[[root_table]]$COLMAP
    CHILDTABLES <- .DBDESC[[root_table]]$CHILDTABLES
    JOINS <- .DBDESC[[root_table]]$JOINS
    all_tables <- names(COLMAP)
    ans <- all_tables[1L]
    for (i in seq_len(length(all_tables))[-1L]) {
        right_table <- all_tables[i]
        right_children <- c(right_table, CHILDTABLES[[right_table]])
        if (length(intersect(child_tables, right_children)) != 0L)
            ans <- paste(ans, "LEFT JOIN", right_table,
                              "ON", JOINS[[right_table]])
    }
    ans
}

### In the case of TranscriptDb objects, the distance between 'root_table'
### and 'child_table' is always <= 2.
### TODO: Revisit this. Not sure it would be guaranteed to work correctly if
### the distance between 'root_table' and 'child_table' was >= 3.
.joinPrimaryKeyToChildTable <- function(root_table, child_table)
{
    COLMAP <- .DBDESC[[root_table]]$COLMAP
    CHILDTABLES <- .DBDESC[[root_table]]$CHILDTABLES
    JOINS <- .DBDESC[[root_table]]$JOINS
    all_tables <- names(COLMAP)
    ans <- ""
    for (i in seq_len(length(all_tables))[-1L]) {
        right_table <- all_tables[i]
        right_children <- c(right_table, CHILDTABLES[[right_table]])
        if (length(intersect(child_table, right_children)) != 0L) {
            if (ans == "") {
                ans <- right_table
                next
            }
            ans <- paste(ans, "LEFT JOIN", right_table,
                              "ON", JOINS[[right_table]])
        }
    }
    ans
}

### convert a named list into an SQL where condition
.sqlWhereIn <- function(vals)
{
    if (length(vals) == 0L)
        return("")
    sql <-
      lapply(seq_len(length(vals)), function(i) {
               v <- vals[[i]]
               if (!is.numeric(v))
                 v <- paste0("'", v, "'")
               v <- paste0("(", paste(v, collapse=","), ")")
               v <- paste0(names(vals)[i], " IN ", v)
               paste0("(", v, ")")
            })
    paste("WHERE", paste(unlist(sql), collapse = " AND "))
}

.extractData <- function(root_table, txdb, what_cols, child_tables, vals,
                         orderby_cols=NULL)
{
    SQL_what <- paste(.asQualifiedColnames(root_table, what_cols),
                      collapse=", ")
    SQL_from <- .joinRootToChildTables(root_table, child_tables)
    SQL_where <- .sqlWhereIn(vals)
    if (length(orderby_cols) == 0L)
        SQL_orderby <- ""
    else
        SQL_orderby <- paste("ORDER BY", paste(orderby_cols, collapse=", "))
    SQL <- paste("SELECT DISTINCT", SQL_what, "FROM", SQL_from,
                 SQL_where, SQL_orderby)
    ans <- queryAnnotationDb(txdb, SQL)
    names(ans) <- what_cols
    ans
}

.getWhereCols <- function(vals)
{
    if (is.null(vals))
        return(character(0))
    ans <- NULL
    if (is.list(vals)) {
        if (length(vals) == 0L)
            return(character(0))
        ans <- names(vals)
    }
    if (is.null(ans))
        stop("'vals' must be NULL or a named list")
    #valid_columns <- setdiff(.ALLCOLS, "exon_rank")
    valid_columns <- .ALLCOLS
    if (!all(ans %in% valid_columns)) {
        valid_columns <- paste0("'", valid_columns, "'", collapse = ", ")
        stop("'vals' must be NULL or a list with names ",
             "in ", valid_columns)
    }
    ans
}

.extractRootData <- function(root_table, txdb, vals, root_columns)
{
    CORECOLS <- .DBDESC[[root_table]]$CORECOLS
    orderby_cols <- CORECOLS[c("chrom", "strand", "start", "end")]
    what_cols <- unique(c(CORECOLS, root_columns))
    where_cols <- .getWhereCols(vals)
    if (is.list(vals))
        names(vals) <- .asQualifiedColnames(root_table, where_cols)
    where_tables <- unique(.getClosestTable(root_table, where_cols))
    .extractData(root_table, txdb, what_cols, where_tables, vals, orderby_cols)
}

.extractDataFromChildTable <- function(root_table, txdb,
                                       primary_key, ids,
                                       child_table, child_columns)
{
    ans_names <- c(primary_key, child_columns)
    primary_key <- paste0("_", primary_key)
    what_cols <- c(primary_key,
                   .asQualifiedColnames(root_table, child_columns))
    SQL_what <- paste(what_cols, collapse=", ")
    SQL_from <- .joinPrimaryKeyToChildTable(root_table, child_table)
    vals <- list(ids)
    names(vals) <- primary_key
    SQL_where <- .sqlWhereIn(vals)
    SQL_orderby <- SQL_what
    SQL <- paste("SELECT DISTINCT", SQL_what, "FROM", SQL_from,
                 SQL_where, "ORDER BY", SQL_orderby)
    ans <- dbEasyQuery(AnnotationDbi:::dbConn(txdb), SQL)
    names(ans) <- ans_names
    ans
}

.extractChildData <- function(root_table, txdb, ids, assigned_columns)
{
    primary_key <- .DBDESC[[root_table]]$CORECOLS["id"]
    ans <- NULL
    all_tables <- names(assigned_columns)
    for (i in seq_len(length(all_tables))[-1L]) {
        child_columns <- assigned_columns[[i]]
        if (length(child_columns) == 0L)
            next
        child_table <- all_tables[i]
        data0 <- .extractDataFromChildTable(root_table, txdb,
                                            primary_key, ids,
                                            child_table, child_columns)
        data <- lapply(data0[ , -1L, drop=FALSE],
                       function(col0)
                       {
                           col <- split(col0, data0[[1L]])
                           col <- col[as.character(ids)]
                           class0 <- class(col0)
                           class <- paste0(toupper(substr(class0, 1L, 1L)),
                                           substr(class0, 2L, nchar(class0)),
                                           "List")
                           get(class)(unname(col))
                       })
        data <- DataFrame(data)
        if (is.null(ans))
            ans <- data
        else
            ans <- c(ans, data)
    }
    ans
}

## make a named list from the metadata data.frame
.makeMetadataList <- function(meta){
    lst <- as.list(meta[,2])
    names(lst) <- meta[,1]
    lst
}

## assign this to the metadata list in relevant object
.assignMetadataList <- function(obj, txdb){
    metadata(obj)[[1]] <- .makeMetadataList(metadata(txdb))
    names(metadata(obj))[[1]] <- "genomeInfo"
    obj
}

## helper to translate back to what is expected from seqinfo()
.translateToSeqInfo <- function(txdb, x){
    tr <- load_chrominfo(txdb, set.col.class=TRUE)$chrom[txdb$new2old]
    names(tr) <- txdb$.chrom    
    idx <- match(x, tr)
    names(tr)[idx]
}

.extractFeatureRowsAsGRanges <- function(root_table, txdb, vals, columns)
{
    ## 1st translate columns from UC format to LC format
    columns <- translateCols(columns, txdb)
    ## Then proceed with checking
    CORECOLS <- .DBDESC[[root_table]]$CORECOLS
    assigned_columns <- .assignColToClosestTable(root_table, columns)
    root_columns <- assigned_columns[[root_table]]
    ## Extract the data from the db.
    root_data <- .extractRootData(root_table, txdb, vals, root_columns)
    ## seqnames may be out of sync with expected results.  Massage back.
    root_data[[CORECOLS["chrom"]]] <- .translateToSeqInfo(txdb, 
                                          root_data[[CORECOLS["chrom"]]])
    child_data <- .extractChildData(root_table, txdb,
                          root_data[[CORECOLS["id"]]], assigned_columns)
    ## Construct the GRanges object and return it.
    ans_seqinfo <- seqinfo(txdb)
    ans_seqnames <- factor(root_data[[CORECOLS["chrom"]]],
                           levels=seqlevels(ans_seqinfo))
    ans_ranges <- IRanges(start=root_data[[CORECOLS["start"]]],
                          end=root_data[[CORECOLS["end"]]])
    ans_strand <- strand(root_data[[CORECOLS["strand"]]])

    activeNames <- names(.isActiveSeq(txdb))[.isActiveSeq(txdb)]
    seqinfo <- seqinfo(txdb)[activeNames]
    ans <- GRanges(seqnames = ans_seqnames,  
                   ranges = ans_ranges,
                   strand = ans_strand,
                   seqinfo = seqinfo)

    ans_values <- c(DataFrame(root_data[root_columns]), child_data)
    if (is.null(names(columns)))
      names(columns) <- columns
    mcols(ans)[names(columns)] <- ans_values[columns]
    .assignMetadataList(ans, txdb)
}



## this helper is just to get the .isActiveSeq vector, but to have it
## named based on the original database seqnames...
## This is important for places where .isActiveSeq() needs to be used
## as part of a database query instead of as part of a external
## representation.
.baseNamedActiveSeqs <- function(txdb){
    trueNames <- load_chrominfo(txdb, set.col.class=TRUE)$chrom
    actSqs <- .isActiveSeq(txdb)
    names(actSqs) <- trueNames[txdb$new2old] ## limit result to these.
    actSqs
}

## This is used to create a list from the .isActiveSeq slot
## !!!
## TODO: I think that this helper is screwing up the vals values in the methods
.makeActiveSeqsList <- function(type, txdb){
    actSqs <- .baseNamedActiveSeqs(txdb)
    keepSeqs <- names(actSqs)[actSqs]
    res <- list(keepSeqs)
    names(res) <- type
    res
}


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Primary extractors: transcripts(), exons(), cds(), and genes().
###
### Proposal:
###   - rename the 'vals' arg -> 'filter'
###   - rename the 'columns' arg -> 'colnames'
###

setGeneric("transcripts", function(x, ...) standardGeneric("transcripts"))

setMethod("transcripts", "data.frame",
    function(x, vals=NULL, columns=c("tx_id", "tx_name"))
        stop("Please use 'transcripts_deprecated' for older ",
             "data.frame-based transcript metadata.")
)

## TODOS: change defaults (WILL break many examples!)
## TODO: change defaults from c("tx_id", "tx_name") to: c("TXID", "TXNAME") 
setMethod("transcripts", "TranscriptDb",
    function(x, vals=NULL, columns=c("tx_id", "tx_name")){
        vals = c(vals, .makeActiveSeqsList("tx_chrom", x))
        .extractFeatureRowsAsGRanges("transcript", x, vals, columns)
      }
)

setGeneric("exons", function(x, ...) standardGeneric("exons"))

setMethod("exons", "data.frame",
    function(x, vals=NULL, columns="exon_id")
        stop("Please use 'exons_deprecated' for older ",
             "data.frame-based transcript metadata.")
)

## TODO: change defaults from c("exon_id") to: c("EXONID") 
setMethod("exons", "TranscriptDb",
    function(x, vals=NULL, columns="exon_id"){
        vals = c(vals, .makeActiveSeqsList("exon_chrom", x))
        .extractFeatureRowsAsGRanges("exon", x, vals, columns)
        }
)

setGeneric("cds", function(x, ...) standardGeneric("cds"))

## TODO: change defaults from c("cds_id") to: c("CDSID") 
setMethod("cds", "TranscriptDb",
    function(x, vals=NULL, columns="cds_id"){
        vals = c(vals, .makeActiveSeqsList("cds_chrom", x))
        .extractFeatureRowsAsGRanges("cds", x, vals, columns)
        }
)

setGeneric("genes", function(x, ...) standardGeneric("genes"))

.regroup <- function(x, new_breakpoints)
{
   if (is.list(x) || is(x, "List")) {
       new_breakpoints <- end(PartitioningByEnd(x))[new_breakpoints]
       x <- unlist(x, use.names=FALSE)
   }
   relist(x, PartitioningByEnd(new_breakpoints))
}

.regroup_rows <- function(df, new_breakpoints)
{
    ## FIXME: endoapply() on a DataFrame object is broken when applying
    ## a function 'FUN' that modifies the nb of rows. Furthermore, the
    ## returned object passes validation despite being broken! Fix it
    ## in IRanges.
    ans <- endoapply(df, function(x) unique(.regroup(x, new_breakpoints)))
    ## Fix the broken DataFrame returned by endoapply().
    ans@nrows <- length(new_breakpoints)
    ans@rownames <- NULL
    ans
}

### If 'single.strand.genes.only' is TRUE (the default), then genes that
### have exons located on both strands of the same chromosome, or on 2
### different chromosomes are dropped. In that case, the genes are returned
### in a GRanges object. Otherwise, they're returned in a GRangesList object
### with the metadata columns requested thru 'columns' set at the top level.
.TranscriptDb.genes <- function(x, vals=NULL, columns="gene_id",
                                single.strand.genes.only=TRUE)
{
    if (!is.character(columns))
        stop("'columns' must be a character vector")
    if (!isTRUEorFALSE(single.strand.genes.only))
        stop("'single.strand.genes.only' must be TRUE or FALSE")
    columns2 <- union(columns, "gene_id")
    tx <- transcripts(x, vals=vals, columns=columns2)

    ## Unroll 'tx' along the 'gene_id' metadata column.
    ## Note that the number of genes per transcript will generally be 1 or 0.
    ## But we also want to handle the situation where it's > 1 which happens
    ## when the same transcript is linked to more than 1 gene (because this
    ## may happen one day and is the reason behind the choice to represent
    ## the 'gene_id' as a CharacterList object instead of a character vector).
    gene_id <- mcols(tx)$gene_id
    ngene_per_tx <- elementLengths(gene_id)
    tx <- tx[rep.int(seq_along(ngene_per_tx), ngene_per_tx)]
    mcols(tx)$gene_id <- unlist(gene_id, use.names=FALSE)

    ## Split 'tx' by gene.
    tx_by_gene <- split(tx, mcols(tx)$gene_id)

    ## Turn inner mcols into outter mcols by reducing them.
    inner_mcols <- mcols(tx_by_gene@unlistData)[columns]
    mcols(tx_by_gene@unlistData) <- NULL
    new_breakpoints <- end(PartitioningByEnd(tx_by_gene))
    mcols(tx_by_gene) <- .regroup_rows(inner_mcols, new_breakpoints)
    ## Compute the gene ranges.
    genes <- range(tx_by_gene)

    if (!single.strand.genes.only)
        return(genes)

    keep_idx <- which(elementLengths(genes) == 1L)
    genes <- genes[keep_idx]
    ans <- unlist(genes, use.names=FALSE)
    mcols(ans) <- mcols(genes)
    names(ans) <- names(genes)
    ans
}

setMethod("genes", "TranscriptDb", .TranscriptDb.genes)


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### "promoters" method
###
### generic is in IRanges
###

setMethod("promoters", "TranscriptDb",
    function(x, upstream=2000, downstream=200, ...)
    {
        gr <- transcripts(x, ...)
        promoters(gr, upstream=upstream, downstream=downstream)
    }
) 


### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### disjointExons()
###

setGeneric("disjointExons", 
    function(x, ...) 
             standardGeneric("disjointExons")
)

setMethod("disjointExons", "TranscriptDb", 
    function(x, aggregateGenes=FALSE, includeTranscripts=TRUE, ...) 
    {
        exonsByGene <- exonsBy(x, by="gene")
        exonicParts <- disjoin(unlist(exonsByGene, use.names=FALSE))

        if (aggregateGenes) {
            foGG <- findOverlaps(exonsByGene, exonsByGene)
            aggregateNames <- .listNames(names(exonsByGene), as.list(foGG)) 
            foEG <- findOverlaps(exonicParts, exonsByGene, select="first")
            gene_id <- aggregateNames[foEG]
            pasteNames <- .pasteNames(names(exonsByGene), as.list(foGG))[foEG]
            orderByGeneName <- order(pasteNames) 
            exonic_rle <- runLength(Rle(pasteNames[orderByGeneName])) 
        } else {
            ## drop exonic parts that overlap > 1 gene
            foEG <- findOverlaps(exonicParts, exonsByGene)
            idxList <- as.list(foEG)
            if (any(keep <- countQueryHits(foEG) == 1)) {
                idxList <- idxList[keep]
                exonicParts <- exonicParts[keep]
            }
            gene_id <- .listNames(names(exonsByGene), idxList)
            orderByGeneName <- order(unlist(gene_id, use.names=FALSE)) 
            exonic_rle <- runLength(Rle(unlist(gene_id[orderByGeneName],
                                               use.names=FALSE))) 
        }
        values <- DataFrame(gene_id)

        if (includeTranscripts) {
           exonsByTx <- exonsBy(x, by="tx", use.names=TRUE )
           foET <- findOverlaps(exonicParts, exonsByTx)
           values$tx_name <- .listNames(names(exonsByTx), as.list(foET))
        } 
        mcols(exonicParts) <- values 
        exonicParts <- exonicParts[orderByGeneName]
        exonic_part <- unlist(lapply(exonic_rle, seq_len), use.names=FALSE)
        exonicParts$exonic_part <- exonic_part
        exonicParts
    }
)

## returns a character vector the same length as indexList
.pasteNames <- function(names, indexList)
 {
    nm <- names[unlist(indexList, use.names=FALSE)]
    rl <- relist(nm, indexList)
    el <- elementLengths(indexList) > 1
    rl[el] <- base::lapply(rl[el], base::paste, collapse="+")
    unlist(rl, use.names=FALSE)
}

## returns a CharacterList the same length as indexList
.listNames <- function(names, indexList)
{
    nm <- names[unlist(indexList, use.names=FALSE)]
    unname(CharacterList(relist(nm, indexList)))
}

 
### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Extractors for features in other databases.
###
### This is for extractors that do NOT point to the TranscriptDb proper.
### Such extractors can point to other databases (mirbase.db) OR they can
### point to other FeatureDbs within the same package.

## helpers for microRNAs

## helpers for for translating chromosomes. Now we have to assume a universal
## translator.  It seems that the chroms are in biomaRt style for mirbase.  So
## for biomaRt, return them as is, but for UCSC, add "chr" prefix.
.translateChromsForUCSC <- function(csomes){
  ## paste0("chr", csomes)
    csomes
}

.translateChromsForBiomaRt <- function(csomes){
  csomes
}

.syncSeqlevel <- function(txdb, ans){
  isActSeq <- .isActiveSeq(txdb)
  n2oNames <- levels(seqnames(ans))
  n2o <- match(seqnames(seqinfo(txdb)), n2oNames)
  seqinfo(ans, new2old=n2o) <- seqinfo(txdb)
  seqlevels(ans, force=TRUE) <- names(isActSeq)[isActSeq]
  ans
}

## main function
.microRNAs <- function(txdb){
  ## get the data about whether or not we have any info.
  con <- AnnotationDbi:::dbConn(txdb)
  bld <- dbGetQuery(con,
           "SELECT value FROM metadata WHERE name='miRBase build ID'")
  src <- DBI::dbGetQuery(con,
           "SELECT value FROM metadata WHERE name='Data source'")[[1]]

  ## And if not - bail out with message
  if(is.na(bld) || dim(bld)[1]==0){
    stop("this TranscriptDb does not have a miRBase build ID specified")}
  ## now connect to mirbase
  require(mirbase.db) ## strictly required

  ## What I need is the join of mirna with mirna_chromosome_build (via _id),
  ## that is then filtered to only have rows that match the species which goes
  ## with the build.
  
  ## connection
  mcon <- mirbase_dbconn()
  ## 1st lets get the organism abbreviation
  sql <- paste0("SELECT organism FROM mirna_species WHERE genome_assembly='",
                bld, "'")
  organism <- dbGetQuery(mcon, sql)[[1]]
  ## now get data and make a GRanges from it
  sql <- paste0("SELECT * from mirna_chromosome_build AS csome INNER JOIN ",
                "(SELECT _id,mirna_id,organism from mirna) AS mirna ",
                "WHERE mirna._id=csome._id AND organism='", organism, "' ")
  data <- dbGetQuery(mcon, sql)

  ## convert chromosomes
  csomes <- switch(src,
                   BioMart=.translateChromsForBiomaRt(data$xsome),
                   UCSC=.translateChromsForUCSC(data$xsome),
                   data$xsome)
  ## build GRanges
  ans <- GRanges(seqnames=csomes,
                 ranges=IRanges( ## sign may be reversed
                   start=abs(data$contig_start),
                   end=abs(data$contig_end)),
                 mirna_id = data$mirna_id,
                 strand=data$strand)
  
  ## Filter seqinfo
  .syncSeqlevel(txdb, ans)
}

setGeneric("microRNAs", function(x) standardGeneric("microRNAs"))

## Then set our method
setMethod("microRNAs", "TranscriptDb", function(x){.microRNAs(x)} )



## main function
.tRNAs <- function(txdb){
  require(FDb.UCSC.tRNAs)
  ## get the current package name
  pkgName <- .makePackageName(txdb)
  ## from here we know what the FDB should MUST look like
  fdbName <- sub("TxDb","FDb",pkgName)
  fdbName <- unlist(strsplit(fdbName,"\\."))
  fdbName[5] <- "tRNAs"
  fdbString <- paste(fdbName,collapse=".")
  if(!exists(fdbString)){
    stop("there is no tRNA data available for this organism/source")
  }else{
    ans <- features(eval(parse(text=fdbString)))
  }
  ## Now check active seqs and set the seqlevels
  ans <- .syncSeqlevel(txdb,ans)
  ## now return
  ans
}

setGeneric("tRNAs", function(x) standardGeneric("tRNAs"))

setMethod("tRNAs", "TranscriptDb", function(x){.tRNAs(x)} )

