################################################################
##
## Prediction of MET and PHO regulation in the whole yeast genome
##
################################################################

## source('~/research/discriminant_analysis/met_pho/R-files/met_pho_analysis.R')
source("~/research/discriminant_analysis/met_pho/R-files/met_pho_config.R")

    
################################################################
## Discriminant analysis on the basis of upstream motifs
##

## Test matrix score and oligonucleotide counts

## for (data.type in c( "matrix_scores", "oligo_counts")) {
## TEMPORARILY inactivated because there is a prolem with the labels of oligo counts
for (data.type in c( "matrix_scores")) {
  
  ## test 3-group and 2-group classifications
  for (discrim.group in c("MET", "PHO", "3groups")) {

    verbose(paste("Data type", data.type), 1)
    verbose(paste("Discrimination group", discrim.group), 1)

    ## ##############################################################
    ## load the data
    setwd(paste(dir.main,'R-files',sep='/'))
    source("load_met_pho_data.R")

    ## select the group labels
    group.labels <- get(paste("group.labels", discrim.group,sep="."))  
    data.prefix <- paste(data.type, discrim.group, sep="_")
    data.title <- data.prefix
    groups <- names(table(group.labels))
    prior.all <- get(paste("prior.all", discrim.group,sep="."))  
    prior.training <- table(group.labels)/sum(table(group.labels))

    ## directories
    dir.results <- paste(dir.main, 'results/discrim_results', data.prefix, sep='/')
    dir.figures <- paste(dir.results, 'figures', sep='/')
    dir.create.check(dir.results)
    dir.create.check(dir.figures)
  
    ## ##############################################################
    ## perform evaluation of different methods for discriminant analysis
    pda.eval.matrix.scores <- compare.stepwise.pda.methods(data,group.labels,data.title=data.title,qda=do.qda,with.pc=do.pc,max.p=max.p, prior=prior.selection,test.number=test.number)

    setwd(dir.figures); export.plot(file.prefix=paste(data.prefix, "stepwise_PDA_error_profiles", sep="_"), export.formats=export.formats.plots, width=10,height=7)
  #  setwd(dir.figures); dev.print(file=paste(data.prefix, "stepwise_PDA_error_profiles.pdf", sep="_"), device=pdf)
    setwd(dir.results); export.object(pda.eval.matrix.scores, file=paste(data.prefix, "stepwise_PDA", sep="_"),export.formats=export.formats.obj)
    setwd(dir.results); export.object(pda.eval.matrix.scores$errors, file=paste(data.prefix, "stepwise_PDA", "error_profiles", sep="_"),export.formats="table")

# Bricolage
#    pda.eval.matrix.scores <- list(); setwd(dir.results);
#    pda.eval.matrix.scores$errors <- read.table(paste(data.prefix, "_stepwise_PDA", "_error_profiles", ".tab", sep=""), header=1,row.names=1,sep="\t")

    ## figures for the article
    plot.error.profiles(pda.eval.matrix.scores$errors, main="", plot.legend=F)
    setwd(dir.figures); export.plot(file.prefix=paste(data.prefix, "stepwise_PDA_error_profiles", "nolegend", sep="_"), export.formats=export.formats.plots, width=7,height=5)
    
    ## monochrome figures for the article
    plot.error.profiles(pda.eval.matrix.scores$errors,
                        main="",
                        plot.legend=F,
                        col=c("#000000", "#333333", "#000000", "#333333",
                          "#666666","#999999","#666666","#999999"))

    setwd(dir.figures); export.plot(file.prefix=paste(data.prefix, "stepwise_PDA_error_profiles", "nolegend", "bw", sep="_"), export.formats=export.formats.plots, width=7,height=5)
    
    
    ################################################################
    ## select the optimal linear discriminant function
    ## and analyse confusion
    predict.best.pda(pda.eval.matrix.scores, group.labels, data, data.prefix)
    
    
    ## plot genes along the two first components
    plot.prcomp.groups(data,group.labels)
    setwd(dir.figures); export.plot(file.prefix=paste(data.prefix, "prcomp", sep="_"), export.formats=export.formats.plots, width=11,height=11)
    
  }


}


## ##############################################################
## LOO outside of the variabe selection loop

## for (data.type in c( "matrix_scores", "oligo_counts")) {
## TEMPORARILY inactivated because there is a prolem with the labels of oligo counts

data.type <- "matrix_scores"
discrim.group <- "PHO"
perm.test <- T
pda.method <- "lda"
for (data.type in c( "matrix_scores")) {
#  for (pda.method in c( "lda", "qda")) {
  for (pda.method in c( "qda")) {

    ## test 3-group and 2-group classifications
    for (discrim.group in c("MET", "PHO", "3groups")) {

      verbose(paste("Data type", data.type), 1)
      verbose(paste("Discrimination group", discrim.group), 1)

      ## ##############################################################
      ## load the data
      setwd(paste(dir.main,'R-files',sep='/'))
      source("load_met_pho_data.R")

      ## select the group labels
      group.labels <- get(paste("group.labels", discrim.group,sep="."))  
      data.prefix <- paste(data.type, discrim.group, sep="_")
      data.title <- data.prefix
      groups <- names(table(group.labels))
      prior.all <- get(paste("prior.all", discrim.group,sep="."))  
      prior.training <- table(group.labels)/sum(table(group.labels))

      ## ##############################################################
      ## LOO outside of the variabe selection loop
      dir.results <- paste(dir.main, 'results/discrim_results', data.prefix,'LOO_outside', sep='/')
      dir.figures <- paste(dir.results, 'figures', sep='/')
      dir.create.check(dir.results)
      dir.create.check(dir.figures)
      setwd(dir.results)

      x <- data[!is.na(group.labels),]

      for (perm.test in c(F,T)) {
        
        if (perm.test) {
          grouping <- sample(group.labels[!is.na(group.labels)])
          ## Compare permuted labels with the original labels
          confusion(grouping,group.labels[!is.na(group.labels)])
        } else {
          grouping <- group.labels[!is.na(group.labels)]
        }
        
        
        ## Quick test
        ##test <- c(1,2,3,7,8)
        ##outside.result <- loo.outside.stepwise.pda(x[test,],grouping[test],max.p=2)
        ##print(outside.result)
        
        outside.result <- loo.outside.stepwise.pda(x,grouping,export.interm.result=T,pda.method=pda.method)
        outside.file <- paste(data.prefix, "stepwise_PDA", pda.method, "LOO_outside", "permut",perm.test,sep="_")
        setwd(dir.results); export.object(outside.result, file=outside.file,export.formats=export.formats.obj)
        
      }
    }
  }
}


## The rest is not working for the moment. I have to debug it.
stop("THERE IS A BUG BELOW, THE REST IS TEMPORARILY INACTIVATED")


## ##############################################################
## Analysis of gene expression data

data.type <- "expression"

## load the data
setwd(paste(dir.main,'R-files',sep='/'))
source("load_met_pho_data.R")


## select the group labels
discrim.group <- "PHO"
group.labels <- get(paste("group.labels", discrim.group,sep="."))  

## output directories
dir.results <- paste(dir.main, 'results/discrim_results', paste(data.type, discrim.group, sep='_'), sep='/')
dir.figures <- paste(dir.results, 'figures', sep='/')
dir.create.check(dir.results)
dir.create.check(dir.figures)

## select the group labels
group.labels <- get(paste("group.labels", discrim.group,sep="."))  
data.prefix <- paste(data.type, discrim.group, sep="_")
data.title <- data.prefix
  
## ##############################################################
## perform evaluation of different methods for discriminant analysis
pda.eval.expression <- compare.stepwise.pda.methods(data,group.labels,data.title=data.title,qda=do.qda,with.pc=do.pc,max.p=max.p, prior=prior.selection)
setwd(dir.figures); export.plot(file.prefix=paste(data.prefix, "stepwise_PDA_error_profiles", sep="_"), export.formats=export.formats.plots,width=10,height=7)
setwd(dir.results); export.object(pda.eval.expression, file=paste(data.prefix, "stepwise_PDA", sep="_"),export.formats=export.formats.obj)

################################################################
## predict class for the whole data set
## take the whole data set
best.vars <- 1:p
best.data <- data
pred.lda <- pda(best.data,group.labels,loo=F,pda.method="lda") ### predictive LDA
misclassified <- plot.misclassified.units(data,group.labels, pred.lda)
predictions.expression <- cbind(group.labels,as.data.frame(predict(pred.lda,best.data,prior=prior.all.PHO)))
predictions.expression <- predictions.expression[order(predictions.expression$posterior.PHO,decreasing=T),]
setwd(dir.results);  export.object(cbind(predictions.expression, x.desc[row.names(predictions.expression),c("NAME","DESCR")]), file.prefix=paste(data.prefix, "stepwise_PDA", "all_pred", sep="_"), export.formats='table')

################################################################
## select genes expressed differently between PHO4c mutant and WT
##    pho4c.vs.wt <- data[,"PHO4c.vs.WT"]
expression.normalized <- normalize.chips(data)
hist (expression.normalized$E.value[,"PHO4c.vs.WT"],breaks=100)
z.thresh <- -qnorm(1/n, mean=0, sd=1, lower.tail = TRUE, log.p = FALSE)

################################################################
## Discriminant analysis on nomalized data
pda.eval.expression.z <- compare.stepwise.pda.methods(expression.normalized$z,group.labels,data.title=paste(data.title, "Z-scores"),qda=do.qda,with.pc=do.pc,max.p=max.p, prior=prior.selection)
setwd(dir.figures); export.plot(file.prefix=paste(data.prefix, "zscores", "stepwise_PDA_error_profiles", sep="_"), export.formats=export.formats.plots,width=10,height=7)
setwd(dir.results); export.object(pda.eval.expression.z, file=paste(data.prefix, "zscores", "stepwise_PDA", sep="_"),export.formats=export.formats.obj)

#    pho4c.vs.wt <- expression.normalized$z[,"PHO4c.vs.WT"]
#    names(pho4c.vs.wt ) <- row.names(expression.normalized$z)
#    (Pho4p.targets <- pho4c.vs.wt[pho4c.vs.wt > z.thresh])
#    Pho4p.targets.names <- features[names(Pho4p.targets),"NAME"]

################################################################
## Select all genes having at least a significant response in one chip
E.threshold <- 10e-10
selected <- row.names(expression.normalized$extr[expression.normalized$extr$E.value.min < E.threshold,])
selected <- selected[order(expression.normalized$extr$E.value.min[selected])]

Pho4p.targets <- cbind(expression.normalized$z[selected,],expression.normalized$extr[selected,],x.desc[selected,c("NAME","DESCR")])
		       
setwd(dir.results);  export.object(Pho4p.targets, file.prefix=paste(data.prefix,'selected_',E.threshold, sep='_'), export.formats='table')

  ## plot profiles of misclassified units
  ##  plot.profiles(data,row.names(failures),legend.labels=x.desc[row.names(failures),"NAME"],lwd=2,xlab="chip",main='"misclassifications" - gene expression data')
  
  ##    predictions.expression <- predict(pred.lda,best.data,prior=c(0.99,0.1))
  ##    predictions.expression <- cbind(group.labels, as.matrix(predictions.expression))
#    table(predictions.expression$class)
#    
#    ## plot profiles of the predicted PHO genes
#    selected.pho <- row.names(predictions.expression$x)[predictions.expression$class=="PHO"]
#    plot.profiles(data,selected.pho,legend.labels=x.desc[selected.pho,"NAME"],lwd=2,xlab="chip",
#                  main="Expression data - Selected genes")



