
## Draw a random subsample from a given sample
random_subsample = function(X=NULL,n=NULL,seedx=NULL){
  if(n<=NROW(X)){
    if(!is.null(seedx)){set.seed(seedx)}
    iid = sample(x = 1:NROW(X),size = n,replace = TRUE)
    Y = X[iid,]
    return(Y)
  }else{
   warning("The size of the subsample n cannot be greater than the size of the original sample X! \n This condition must hold: n <= NROW(X)")
  }
}


## From 'fifer' library
auto.layout = function(n, layout=T){
  ### figure out how many rows
  sq = sqrt(n)
  rws = round(sq)
  
  #### if it's a perfect square, fill the matrix
  if (sqrt(n) == round(sqrt(n))){
    numbs = sort(rep(1:n, times=2))
    m = matrix(numbs, nrow=sq, byrow=T)
  } else {
    
    #### repeat twice the numbers that fit nicely
    topNum = trunc(n/rws)*rws
    numbs = sort(rep(1:topNum, times=2))
    if (topNum==n){
      m = matrix(numbs, nrow=rws, byrow=T)
    } else {
      #### get the rest figured out
      rem = n-topNum  ### remaining numbers
      rest = sort(rep((topNum+1):n, times=2))
      cols = (topNum/rws)*2
      rest = c(rep(0, times=(cols-length(rest))/2), rest, rep(0, times=(cols-length(rest))/2))
      m = matrix(c(numbs, rest), nrow=rws+1, byrow=T)
    }
  }
  
  if (layout){
    layout(m)
  } else {
    m
  }
}

diffbeta_plot = function(fitted_model=NULL,new_window=FALSE,qs=0.98,adjText=-0.85){
  if(new_window==TRUE && !grepl('Mac',Sys.info()['sysname'])){x11()}
  if(new_window==TRUE && grepl('Mac',Sys.info()['sysname'])){quartz()}
  if(new_window==TRUE && grepl('Darwin',Sys.info()['sysname'])){quartz()}
  
  cols = c("goldenrod4","firebrick","dodgerblue4")
  out = dfbetas(fitted_model)
  J=fitted_model$rank-1
  auto.layout(J)
  for(j in 2:(J+1)){
    iid = which(abs(out[,j])>quantile(out[,j],qs))
    plot(out[,j],xlab="index obs",ylab="diff-Beta",bty="n",main=colnames(out)[j]); points(iid,out[iid,j],pch=20,col="red"); text(iid,out[iid,j],label=iid,adj=adjText)
  }
}

influential_plot = function(fitted_model=NULL,new_window=FALSE,d0=0.5,d1=1,qs=0.98,adjText=-0.85){
  if(new_window==TRUE && !grepl('Mac',Sys.info()['sysname'])){x11()}
  if(new_window==TRUE && grepl('Mac',Sys.info()['sysname'])){quartz()}
  if(new_window==TRUE && grepl('Darwin',Sys.info()['sysname'])){quartz()}
  
  cols = c("goldenrod4","firebrick","dodgerblue4")
  x=hatvalues(fitted_model);y=rstudent(fitted_model); n=length(x); J=fitted_model$rank
  plot(x,y,cex=exp(cooks.distance(fitted_model)),ylim=c(-max(abs(y)),max(abs(y))),
       bty="n",col=cols[1],pch=1,lwd=2,ylab="studentized residuals",xlab="leverages")
  points(seq(min(x),max(x),length.out=n),+sqrt(d0*(J+1)*((1-seq(0,1,length.out=n))/seq(0,1,length.out=n))),type="l",col=cols[2],lty=3,lwd=2)
  points(seq(min(x),max(x),length.out=n),+sqrt(d1*(J+1)*((1-seq(0,1,length.out=n))/seq(0,1,length.out=n))),type="l",col=cols[3],lty=3,lwd=2)
  points(seq(min(x),max(x),length.out=n),-sqrt(d0*(J+1)*((1-seq(0,1,length.out=n))/seq(0,1,length.out=n))),type="l",col=cols[2],lty=3,lwd=2)
  points(seq(min(x),max(x),length.out=n),-sqrt(d1*(J+1)*((1-seq(0,1,length.out=n))/seq(0,1,length.out=n))),type="l",col=cols[3],lty=3,lwd=2)
  
  xq=quantile(exp(cooks.distance(fitted_model)),qs)
  iid=which(exp(cooks.distance(fitted_model))>xq)
  text(x[iid],y[iid],labels = names(iid),adj = adjText)
}

posterior_pcheck_Normal = function(fitted_model=NULL,M=150,seed.sx=NULL,new_window=FALSE,overlap=TRUE,titlep=""){
  require(overlapping)
  if(M<1000){message("Warning: M should be usually large (e.g., M>1000) to get reliable results \n")}
  require(overlapping)
  if(is.null(seed.sx)){
    Ypred = simulate(object = fitted_model,nsim = M)
  }else{
    Ypred = simulate(object = fitted_model,nsim = M,seed = seed.sx)
  }
  
  if(class(fitted_model)=="lm"){
    yname = gsub(x = strsplit(x = as.character(fitted_model$call)[2],split = "~")[[1]][1],pattern = " ",replacement = "")
    y = fitted_model$model[,yname]
  }else if(class(fitted_model)=="lmerMod"|class(fitted_model)=="lmerModLmerTest"){
    yname = gsub(x = strsplit(x = as.character(fitted_model@call)[2],split = "~")[[1]][1],pattern = " ",replacement = "")
    y = fitted_model@frame[,yname]
  }
  
  ymax = max(max(apply(Ypred,2,function(x)max(density(x)$y))),max(density(y)$y))
  if(overlap==TRUE){
    ov = mean(apply(Ypred,2,function(x)overlapping::overlap(x = list(y,x),plot = FALSE,partial.plot = FALSE)$OV))
  }
  
  if(new_window==TRUE && !grepl('Mac',Sys.info()['sysname'])){x11()}
  if(new_window==TRUE && grepl('Mac',Sys.info()['sysname'])){quartz()}
  
  plot(density(y),bty="n",xlab="",ylab="",ylim=c(0,ymax),col="dodgerblue4",main=titlep)
  for(m in 1:M){lines(density(Ypred[,m]),col="gray80")} #densities
  lines(density(y),bty="n",main="",xlab="",ylab="",lwd=2.5,col="dodgerblue4")
  
  if(overlap==TRUE){
    text(quantile(density(y)$x,0.7),ymax,paste("OV=",round(ov,3)))
  }
  
  yt_pred = apply(Ypred,2,mean)
  yt_obs = mean(y) 
  delta_t1 = min(sum(yt_pred >= yt_obs)/M, sum(yt_pred < yt_obs)/M)
  
  yt_pred = apply(Ypred,2,var) 
  yt_obs = var(y) 
  delta_t2 = min(sum(yt_pred >= yt_obs)/M, sum(yt_pred < yt_obs)/M)
  
  yt_pred = apply(Ypred,2,min) 
  yt_obs = min(y) 
  delta_t3 = min(sum(yt_pred >= yt_obs)/M, sum(yt_pred < yt_obs)/M)
  
  yt_pred = apply(Ypred,2,max)
  yt_obs = max(y) 
  delta_t4 = min(sum(yt_pred >= yt_obs)/M, sum(yt_pred < yt_obs)/M)
  
  # yt_pred = apply(Ypred,2,function(x)psych::skew(x))
  # yt_obs = psych::skew(y)
  # delta_t5 = min(sum(yt_pred >= yt_obs)/M, sum(yt_pred < yt_obs)/M)
  # 
  # yt_pred = apply(Ypred,2,function(x)psych::kurtosi(x))
  # yt_obs = psych::kurtosi(y)
  # delta_t6 = min(sum(yt_pred >= yt_obs)/M, sum(yt_pred < yt_obs)/M)
  
  #out = matrix(cbind(delta_t1,delta_t2,delta_t3,delta_t4,delta_t5,delta_t6),nrow = 1,dimnames = list("delta_t",c("mean","var","min","max","skew","kurt")))
  out = matrix(cbind(delta_t1,delta_t2,delta_t3,delta_t4),nrow = 1,dimnames = list("delta_t",c("mean","var","min","max")))
  print(out)
  message("Note: When delta_t approximates zero then the model is not completely adequate to represent the characteristics of the data")
  message(paste0("\nOverlap index (average): ",round(ov,3)))
}

allModels <- function(vars, interactions = FALSE, maxk = NULL) {
  require(gtools)
  
  Mtab <- NULL
  for (i in 1:length(vars)) {
    H <- combinations(length(vars), i, vars)
    
    if (interactions) {
      if (is.null(maxk)) {
        MAXK <- ncol(H)
      } else {
        MAXK <- maxk
      }
    }
    
    for (j in 1:nrow(H)) {
      
      if (ncol(H)==1) {
        MM <- H[j,]
      } else {
        MM <- paste(H[j,],collapse=" + ")
        
        if (interactions) {
          for (k in 2:ncol(H)) {
            
            if ( k <= MAXK ) {
              MM <- rbind(MM,interazioni(H[j,],k))    
            }
            
          }
        }
      }
      Mtab <- rbind(Mtab, MM)
    }
    
  }
  return(Mtab)
}

interazioni <- function(vars,k=2) {
  require(gtools)
  
  if (length(vars)<2) stop("Impossibile gestire interazioni con vars < 2")
  if (k>length(vars)) stop("Impossibile gestire interazioni con k > lenght(vars)")
  
  Itab <- NULL
  H <- combinations(length(vars),k,vars)
  for (i in 1:nrow(H)) {
    altre <- paste(vars[!vars %in% H[i,]],collapse=" + ")
    I <- paste(H[i,],collapse=" * ")
    
    if (nchar(altre)>0) {
      Itab <- rbind(Itab, paste0(I," + ", altre))  
    } else {
      Itab <- rbind(Itab, I)
    }
    
  }
  return(Itab)
}

exploratory_plots = function(y=NULL,X=NULL,new_window=TRUE,plot_type=c("lm","loess")){
  
  cols = c("gray65","dodgerblue4")
  J = NCOL(X)
  
  jjd = which(mapply(function(j)is.factor(X[,j]),1:NCOL(X)))
  categ=FALSE;if(length(jjd)>0){categ=TRUE}
  
  nms = colnames(X); if(is.null(nms)){nms = as.character(1:J)}
  if(plot_type[1]=="loess" & categ==TRUE){
    stop('Method loess cannot be used with categorical variables. Use lm instead.')
  }else{
    if(new_window==TRUE && !grepl('Mac',Sys.info()['sysname'])){x11()}
    if(new_window==TRUE && grepl('Mac',Sys.info()['sysname'])){quartz()}
    if(new_window==TRUE && grepl('Darwin',Sys.info()['sysname'])){quartz()}
    
    auto.layout(J+1)
    for(j in 1:J){
      if(plot_type[1]=="lm"){
        plot(X[,j],y,bty="n",main="",xlab=nms[j],ylab="y",pch=20,cex=1.35,cex.lab=1.45,col=cols[1])
        abline(reg = lm(y~X[,j]),col=cols[2],lty=2,lwd=1.5)
      }else if(plot_type[1]=="loess" & categ!=TRUE){
        plot(X[,j],y,bty="n",main="",xlab=nms[j],ylab="y",pch=20,cex=1.35,cex.lab=1.45,col=cols[1])
        iid = order(X[,j])
        lines(X[iid,j],loess(y~X[,j])$fitted[iid],col=cols[2],lty=2,lwd=2)
      }
    }
  }
  xd=seq(min(y),max(y),length.out=50); yd=dnorm(x = xd,mean = mean(y),sd = sd(y)); hd=hist(y,prob=TRUE,plot=FALSE)
  hist(y,bty="n",main="Response variable",xlab="",col="gray85",prob=TRUE,ylab="",ylim=c(min(yd),max(max(yd),max(hd$density))))
  lines(density(y),lwd=2,col="firebrick",lty=1)
  lines(xd,yd,lwd=2,col="deepskyblue4",lty=2)
}


leaps_r2 = function(y=NULL,X=NULL){
  require(leaps)
  J = NCOL(X)
  nms = colnames(X); if(is.null(nms)){nms = as.character(1:J)}
  
  jjd = which(mapply(function(j)is.factor(X[,j]),1:NCOL(X)))
  if(length(jjd)>0){stop("Categorical predictors cannot be used in leaps::leaps(). Use leaps2_r2() instead.")}
  
  out = leaps::leaps(x = X,y = y,method = "adjr2",int = TRUE,names = nms)
  xvars = out$which[which.max(out$adjr2),]
  vars = names(xvars[xvars==TRUE])
  
  cat("== Results == \n")
  cat(paste0("The total number of submdels is : ",length(out$adjr2),"\n"))
  cat(paste0("The adjusted R2 index for the final model is: ",round(max(out$adjr2),4),"\n"))
  cat("The best subset of predictors is as follows: \n")
  for(j in 1:length(vars)){cat(paste0(j,". ",vars[j]));cat("\n")}
  
  invisible(vars)
}

leaps2_r2 = function(data=NULL,formula=NULL){
  require(leaps)
  J = NCOL(X)
  nms = colnames(X); if(is.null(nms)){nms = as.character(1:J)}
  formula = as.formula(formula)
  
  out = summary(leaps::regsubsets(x = formula,data=data,intercept=TRUE,really.big=TRUE))
  xvars = out$which[which.max(out$adjr2),]
  vars = names(xvars[xvars==TRUE])[-1]
  
  cat("== Results == \n")
  cat(paste0("The total number of submdels is : ",length(out$adjr2),"\n"))
  cat(paste0("The adjusted R2 index for the final model is: ",round(max(out$adjr2),4),"\n"))
  cat("The best subset of predictors is as follows: \n")
  for(j in 1:length(vars)){cat(paste0(j,". ",vars[j]));cat("\n")}
  
  invisible(vars)
}

define_formula = function(xvars=NULL,y=NULL){
  fm = as.formula(paste0(eval(y),"~",paste(xvars,collapse = "+")))
  return(fm)
}

check_unusual_observations = function(fitted_model=NULL,m=10){
  require(MASS);require(car)
  n = length(fitted_model$fitted.values)
  hvals = hatvalues(fitted_model)
  cooksd = cooks.distance(fitted_model)
  rsd = MASS::studres(fitted_model)
  alphas = mapply(function(i)2*min(pt(q = -rsd[i],df = fitted_model$df.residual-1,lower.tail = FALSE),pt(q = rsd[i],df = fitted_model$df.residual-1,lower.tail = FALSE))*n,1:n)
  
  cat("\n")
  cat("Leverage observations (h values) \n")
  cat("====================================== \n")
  D = sort(hvals,decreasing = TRUE)
  D = data.frame(names(D),rsd[as.numeric(names(D))],round(D,3),row.names = NULL); names(D) = c("obs","stud. residuals","h value")
  if(NROW(D)<m){print(D[1:NROW(D),],row.names = FALSE)}else{print(D[1:m,],row.names = FALSE)}
  cat("====================================== \n")
  
  cat("\n\n")
  cat("Outliers (alpha values)\n")
  cat("====================================== \n")
  iid = which(alphas<=0.10)
  D = data.frame(iid,rsd[iid],round(alphas[iid],3),row.names = NULL); names(D) = c("obs","stud. residuals","alpha value")
  if(NROW(D)<m){print(D[1:NROW(D),],row.names = FALSE)}else{print(D[1:m,],row.names = FALSE)}
  cat("====================================== \n")
  
  cat("\n\n")
  cat("Influential observations (Cook's distances)\n")
  cat("================================================= \n")
  D = sort(cooksd,decreasing = TRUE);
  D = data.frame(names(D),rsd[as.numeric(names(D))],round(D,3),row.names = NULL); names(D) = c("obs","stud. residuals","cooks value")
  if(NROW(D)<m){print(D[1:NROW(D),],row.names = FALSE)}else{print(D[1:m,],row.names = FALSE)}
  cat("================================================= \n")
  
  invisible(list(h_values=hvals,alpha_values=alphas,cooks_values=cooksd))
}


white.test <- function(lmobj, squares.only=FALSE)
{
  stopifnot(class(lmobj)=='lm')
  mydata <- lmobj$model
  mydata[,1] <- lmobj$residual^2
  fml <- lmobj$call$formula
  formula1 <- paste(fml[2],fml[1],fml[3])
  pvs <- attr(lmobj$terms,"term.labels")
  k <- length(pvs);
  n <- length(lmobj$fit)
  
  for(i in 1:k){
    tmp <- NULL;
    if(substr(pvs[i],1,2)=="I("){
      tmp2 <- substr(pvs[i],3, nchar(pvs[i])-1);
    }else{
      tmp2 <- pvs[i];
    }
    for(j in 1:nchar(tmp2)){
      tmp1 <- substr(tmp2,j,j)
      if(tmp1 == ":")
        tmp <- paste(tmp, "*", sep='')
      else
        tmp <- paste(tmp, tmp1, sep='')
    }
    pvs[i] <- tmp
  }
  formula2 <- paste(fml[2],fml[1])
  for(i in 1:k){
    if(i>1)
      formula2 <- paste(formula2, "+", sep='')
    formula2 <- paste(formula2, "I(", pvs[i],")",sep='')
    if(squares.only){
      formula2 <- paste(formula2, "+I(", pvs[i], 
                        "*", pvs[i], ")", sep = "")
    }else{
      for(j in i:k)
        formula2 <- paste(formula2,"+I(",pvs[i],
                          "*",pvs[j],")", sep='')
    }
  }
  
  method <- ifelse(squares.only,
                   "White test for constant variance, squares only",
                   "White test for constant variance")
  
  out <- lm(as.formula(formula2),data=mydata)
  if(summary(out)$r.squared == 1.0){
    RVAL <- NULL;
    warning("Test failed.  Possible reasons:\n\t (1) collinearity, or (2) sample size is not big enough for the White's test.");
  }else{
    LM = summary(out)$r.squared * n  
    names(LM) <- "White"
    df <- out$rank - 1
    names(df) <- "df";
    RVAL <- list(statistic = LM,
                 parameter = df,
                 method = method,
                 p.value= pchisq(LM,df,lower.tail=FALSE),
                 data.name=NULL)
    class(RVAL) <- "htest"
  }
  return(RVAL)
}









