source("model_choice.R")
source("statistics.R")
source("Calibration.R")
library(lars)
##------------------------------------------------------------------
## Two-sample linear regression :
## ARGUMENTS :
# sample 1 : Y1 vector of size n1 and X1 matrix of dimension (n1,p)
# sample 2 : Y2 vector of size n2 and X2 matrix of dimension (n2,p)
# test whether model (1) and (2) are the same : test of H0 given in (3). The test statistic FS is decomposed into the term FSV which evaluates the discrepancies in term of conditional variances and the terms (FS1, FS2) which adress the comparison of beta1 and beta2.
# by default, test of level alpha=0.05
## OPTIONS :
# -choices of Test collections :
# model.family="size1" or "Lasso"
# by default if model.family="Lasso" : Dmax=floor(min(length(Y1),length(Y2))/2)
# -calibration :
# calibr.meth ="Bonferroni" or "permutation"
# by default if calibr.meth="permutation", B=10000 permutations are computed
# -test statictic :
# Test.variance = TRUE indicates that the model of dimension 0, i.e the comparison of the variances Var(Y1) and Var(Y2), is included in the collection of test.
## OUTPUT : for each calibration method in calibr.meth, for each model family in model.family :
# -decision : decision=1 if the global null hypothesis H0 is rejected.
# -pvalue (see Section 3.4): when calibr.meth="Permutation", an empirical p-value is computed as the fraction of the permuted values of the statistic that are less than the observed test statistic.
# $pvalueV is the p-value associated to the term FSV of the statistic and $pvalue12 is the p-value associated to the terms (FS1, FS2) of the statistic.
# $pvalueV and $pvalue12 have both to be compared to alpha/2.
# -rejected_model (see Section 3.4): if decision=1, it indicates for which model S the null hypothesis H_{0,S} is rejected.
# As we can reject because of the term FSV of the statistic and/or because of the terms (FS1,FS2) of the statistic, it returns rejected_model_V and rejected_model_12 the models responsible for the rejection for each term of the statistic. It returns "no reject" in case of non rejection.
# rejected_model_V = 0 means that the reject in the part FSV (part that evaluates the discrepancies in term of conditional variances) is due to the model of dimension 0 i.e the comparison of the variances Var(Y1) and Var(Y2).
##------------------------------------------------------------------
Test <- function(X1,X2,Y1,Y2,alpha=0.05, model.family, Dmax=NULL, calibr.meth, B=10000 , Test.variance=TRUE)
{ n1 <- length(Y1)
n2 <- length(Y2)
p=ncol(X1)
if (is.null(Dmax)){ Dmax <- floor(min(length(Y1),length(Y2))/2)}
#choices of Test collections
modele <- model.choice(X1,X2,Y1,Y2,n1,n2,p, model.family,Dmax)
#test statistic
statist <- lapply(modele,statistique.globale.CLR,Y1,Y2,X1,X2,alpha,Test.variance)
#calibration
calibration=calibration.level(n1,n2,p,alpha,X1,X2,Y1,Y2,B,model.family,calibr.meth,Dmax,Test.variance)
#decision
decision=getDecision.CLR(calibr.meth, model.family, statist, calibration)
return(decision)
}
##--------------------------------------------------------------------------------
## Application to the comparison of Gaussian graphical model :
## ARGUMENTS :
# expr is a dataset of size (n,p) : n observations (for example n patients) concerning p variables (for example genes expression).
# condition is a vector of size n with two possible values : it indicates from which condition (condition 1 or 2) come the observations.
# a neighborhood test is run at level alpha/p for the variable i. The neighorhood test statistic FS is decomposed into the term FSV which evaluates the discrepancies in term of conditional variances and the terms (FS1, FS2) which adress the comparison of beta1 and beta2.
## OPTIONS :
# -choices of Test collections :
# model.family="size1" or "Lasso"
# by default if model.family="Lasso" : Dmax=floor(min(length(Y1),length(Y2))/2)
# -calibration :
# calibr.meth ="Bonferroni" or "permutation"
# by default if calibr.meth="permutation", B=10000 permutations are computed
# -test statictic :
# Test.variance = TRUE indicates that the model of dimension 0, i.e the comparison of the variances Var(Y1) and Var(Y2), is included in the collection of test.
## OUTPUT : for each calibration method in calibr.meth, for each model family in model.family :
# -decision : decision=1 if the neighborhood test for the variable i is rejected.
# -pvalue (see Section 3.4): when calibr.meth="Permutation", an empirical p-value is computed as the fraction of the permutation values of the statistic that are less than the observed test statistic.
# $pvalueV is the p-value associated to the term FSV of the statistic and $pvalue12 is the p-value associated to the terms (FS1,FS2) of the statistic.
# $pvalueV and $pvalue12 have both to be compared to (alpha/p)/2.
# -rejected_model (see Section 3.4) : if decision=1, it indicates which set of variable(s) S is responsible for the rejection of the neighborhood test.
# If the columm names of the dataset "expr" are the names of the genes, it returns the names of the gene(s) responsible for the rejection; if the columms of the dataset "expr" has no particular names, it returns the number(s) of the variable(s) responsible for the rejection.
# As we can reject because of the part FSV of the statistic and/or because of the part FS1-FS2 of the statistic, it returns rejected_model_V and rejected_model_12 the models responsible for the rejection for each term of the statistic. It returns "no reject" in case of non rejection.
# rejected_model_V = 0 means that the reject in the part FSV (part that evaluates the discrepancies in term of conditional variances) is due to the model of dimension 0 i.e the comparison of the variances Var(Y1) and Var(Y2).
##--------------------------------------------------------------------------------
neigh.test <- function(i,expr,condition,alpha=0.05, model.family, Dmax=NULL, calibr.meth, B=10000, Test.variance=TRUE)
{ # selection of variable i :
x <- expr[,-i]
y <- expr[,i]
# Bonferroni correction :
alpha <- alpha/ncol(expr)
res <- Test(X1=as.matrix(x[condition==1,]),X2=as.matrix(x[condition==2,]),Y1=y[condition==1],Y2=y[condition==2],alpha, model.family, Dmax, calibr.meth, B=B ,Test.variance)
cat("gene ", i , " done \n")
# name of the gene(s) responsible for the rejection in the neighborhood test for the variable i.
if (is.null(colnames(expr))){ colnames(expr)=1:ncol(expr) }
for (meth in calibr.meth) {
for (family in model.family) {
if ((is.numeric(res[[meth]][[family]]$rejected_model_V)==T) & (res[[meth]][[family]]$rejected_model_V > 0))
{res[[meth]][[family]]$rejected_model_V=colnames(expr[,-i])[res[[meth]][[family]]$rejected_model_V]}
if (is.numeric(res[[meth]][[family]]$rejected_model_12)==T)
{ res[[meth]][[family]]$rejected_model_12=colnames(expr[,-i])[res[[meth]][[family]]$rejected_model_12]} }
}
return(res)
}