library(Matrix)
library(MASS)
library(plspm)
library(lavaan)
library(foreach)
library(iterators)
library(doParallel)
source('generate.R')
source('lavaan2.R')
source('fitlavaan.condition0.R')
source('plspm2.R')
source('fitpls.condition0.R')

message('sources loaded')

run.condition <- function(model, nobs, nind, indcov, errorvar, nfolds=10) {

	data <- generate(model, nobs, nind, indcov, errorvar)

	X.mat <- data[[1]]
	Y.mat <- data[[2]]
	n.comps <- min(ncol(Y.mat), ncol(X.mat))

     results <- NULL

	fold.indices <- sample(1:nfolds, nrow(X.mat), replace=TRUE)

	for (fold in 1:nfolds) {

		X.train <- X.mat[fold.indices != fold,]
		Y.train <- Y.mat[fold.indices != fold,]
		X.test <- X.mat[fold.indices == fold,]
		Y.test <- Y.mat[fold.indices == fold,]

		try( {   
		# #### SEM fit #### #
		sem.fitted <- fit.lavaan(model, nind, cbind(X.train, Y.train))
		if (sem.fitted@Fit@converged) {
		  y.pred <- predict(sem.fitted, as.data.frame(X.test))
            diff <- Y.test - y.pred
            r <- cbind(model, nobs, nind, indcov, errorvar, fold, 'ML', 'ML', 'ML', sqrt(mean((diff)^2)))
            results <- rbind(results, r)
		} else message(paste("Convergence", sem.fitted@Fit@converged))
		} , silent=FALSE)

# inner estimator does not matter at all, no point in simulating them
# (no differences in the 6th decimal place)
		for (scheme in c('centroid', 'factorial', 'path')) {
		  for (mode in c('A', 'B', 'AB', 'BA')) {
    		    try( {
		    # #### PLS PM fit #### #
		    pls.fitted <- fit.pls(model, mode, scheme=scheme, nind, cbind(X.train, Y.train))
		    y.pred <- predict(pls.fitted, X.test)
		    diff <- Y.test - y.pred
              r <- cbind(model, nobs, nind, indcov, errorvar, fold, 'PLS', mode, scheme, sqrt(mean((diff)^2)))
		    results <- rbind(results, r)
		    } , silent=FALSE)
            }
          }

		# #### LM fit #### #
          try( { 
		rhs.names <- colnames(X.train)
          fmla <- as.formula(paste("Y.train ~", paste(paste(rhs.names, sep=""), collapse="+")))
		fit.lm <- lm(fmla, data<-as.data.frame(cbind(X.train, Y.train)))
		y.pred <- predict(fit.lm, as.data.frame(X.test))
		diff <- Y.test - y.pred
          r <- cbind(model, nobs, nind, indcov, errorvar, fold, 'LM', 'LM', 'LM', sqrt(mean((diff)^2)))
		results <- rbind(results, r)
		} , silent=TRUE)

        cat('.')
	}
     cat(' /\n')
	results
}

run <- function(fname.stem, nreps) {

   results <- foreach(repetition=1:nreps, .combine=rbind, .export=c('run.condition', 'generate', 'model1.data', 'model2.data', 'model3.data', 'make.indicators', 'generate.formative', 'rnorm.emp', 'fit.pls', 'plspm2', 'predict.plspm2', 'fit.lavaan', 'lavaan2'), .packages=c('plspm', 'MASS')) %dopar% {

    results <- NULL
      for (model in c(1, 2, 3)) {
#      for (model in c(1)) {
        for (nobs in c(100, 250, 750)) {
#        for (nobs in c(250)) {
          for (nind in c(3, 5, 7)) {
#          for (nind in c(5)) {
            for (indcov in c(0, 0.1, 0.4)) {
              for (errorvar in c(0, 0.1, 0.4)) {
                message(paste('Rep', repetition, 'Model', model, 'NObs', nobs, 'NInd', nind, 'IndCov', indcov, 'ErrorVar', errorvar))
                flush.console()
                r <- run.condition(model, nobs, nind, indcov, errorvar)
                r <- cbind( rep(repetition, nrow(r)), rep(0, nrow(r)), r)
                results <- rbind(results, r)
              }
            }
          }
        }
      }
      colnames(results) <- c('Repetition', 'Condition', 'Model', 'SampleSize', 'NumInd', 'IndCov', 'ErrorVar', 'Fold', 'parameterEstimator', 'outerEstimator', 'innerEstimator', 'RMSE')
      write.csv(results, paste(fname.stem, '.partial.sample', repetition, '.results.csv', sep=''), row.names=F)
      results
    }
    colnames(results) <- c('Repetition', 'Condition', 'Model', 'SampleSize', 'NumInd', 'IndCov', 'ErrorVar', 'Fold', 'parameterEstimator', 'outerEstimator', 'innerEstimator', 'RMSE')
    write.csv(results, paste(fname.stem, '.results.csv', sep=''), row.names=F)
    results.frame <- read.csv(file=paste(fname.stem, '.results.csv', sep=''))

    if (nreps > 1) {
	results.frame <- na.omit(results.frame)

    	a <- aggregate(results.frame[,12], list(Repetition=results.frame$Repetition, Model=results.frame$Model, Condition=results.frame$Condition, SampleSize=results.frame$SampleSize, NumInd=results.frame$NumInd, IndCov=results.frame$IndCov, ErrorVar=results.frame$ErrorVar, PEstimator=results.frame$parameterEstimator, Inner=results.frame$innerEstimator, Outer=results.frame$outerEstimator), mean)
        write.csv(a, file=paste('aggregate.', fname.stem, '.fold.means.csv', sep=''), row.names=F)

	results.frame <- a

    	a <- aggregate(results.frame[,11], list(Model=results.frame$Model, Condition=results.frame$Condition, SampleSize=results.frame$SampleSize, NumInd=results.frame$NumInd, IndCov=results.frame$IndCov, ErrorVar=results.frame$ErrorVar, PEstimator=results.frame$PEstimator, Inner=results.frame$Inner, Outer=results.frame$Outer), length)
        write.csv(a, file=paste('aggregate.', fname.stem, '.count.csv', sep=''), row.names=F)

    	a <- aggregate(results.frame[,11], list(Model=results.frame$Model, Condition=results.frame$Condition, SampleSize=results.frame$SampleSize, NumInd=results.frame$NumInd, IndCov=results.frame$IndCov, ErrorVar=results.frame$ErrorVar, PEstimator=results.frame$PEstimator, Inner=results.frame$Inner, Outer=results.frame$Outer), var)
        write.csv(a, file=paste('aggregate.', fname.stem, '.variances.csv', sep=''), row.names=F)

    	a <- aggregate(results.frame[,11], list(Model=results.frame$Model, Condition=results.frame$Condition, SampleSize=results.frame$SampleSize, NumInd=results.frame$NumInd, IndCov=results.frame$IndCov, ErrorVar=results.frame$ErrorVar, PEstimator=results.frame$PEstimator, Inner=results.frame$Inner, Outer=results.frame$Outer), sd)
        write.csv(a, file=paste('aggregate.', fname.stem, '.sd.csv', sep=''), row.names=F)

    	a <- aggregate(results.frame[,11], list(Model=results.frame$Model, Condition=results.frame$Condition, SampleSize=results.frame$SampleSize, NumInd=results.frame$NumInd, IndCov=results.frame$IndCov, ErrorVar=results.frame$ErrorVar, PEstimator=results.frame$PEstimator, Inner=results.frame$Inner, Outer=results.frame$Outer), mean)
        write.csv(a, file=paste('aggregate.', fname.stem, '.grand.means.csv', sep=''), row.names=F)

	a <- read.csv(file=paste('aggregate.', fname.stem, '.fold.means.csv', sep=''))
	t.test.results <- NULL
	# pairwise t.tests
	for (model in levels(as.factor(a$Model))) {
		for (condition in levels(as.factor(a$Condition))) {
			for (samplesize in levels(as.factor(a$SampleSize))) {
				for (numind in levels(as.factor(a$NumInd))) {
					for (errorvar in levels(as.factor(a$ErrorVar))) {
						a.sub <- a[a$Model == model & a$Condition == condition & a$SampleSize == samplesize & a$NumInd == numind & a$ErrorVar == errorvar,]
						s <- data.frame(cbind(	a.sub[a.sub$Outer == 'LM',]$x, 
											a.sub[a.sub$Outer == 'ML',]$x,
											a.sub[a.sub$Outer == 'A',]$x,
											a.sub[a.sub$Outer == 'B',]$x,
											a.sub[a.sub$Outer == 'AB',]$x,
											a.sub[a.sub$Outer == 'BA',]$x	))
						colnames(s) <- c('LM', 'ML', 'PLS.A', 'PLS.B', 'PLS.AB', 'PLS.BA')

						s <- s[, order(-colMeans(s))]

						for (c in 2:ncol(s) ) {
							test.result <- t.test(s[,c-1], s[,c], paired=TRUE)
							t.test.results <- rbind(t.test.results, cbind(model, condition, samplesize, numind, errorvar, colnames(s)[c-1], colnames(s)[c], test.result$p.value, test.result$parameter, test.result$statistic, test.result$estimate))	
						}

#						d <- list()
#						for ( e in 1:ncol(s)) {
#							d[[e]] <- density(s[, e])
#						}
#						max.x = max(unlist(lapply(d, function(X) { max(X$x) } )))
#						max.y = max(unlist(lapply(d, function(X) { max(X$y) } )))
#						min.x = max(unlist(lapply(d, function(X) { min(X$x) } )))
#						min.y = 0

#						jpeg(filename=paste("RMSE.density.model", model, ".n", samplesize, ".i", numind, ".e", errorvar, '.jpeg', sep=''), width=1000, height=1000, res=150)
#						colors = c("black", "blue", "green", "red", "yellow", "magenta")
#						plot(density(s[, 1]), lwd=2, xlim=c(min.x, max.x), ylim=c(min.y, max.y), col=colors[1], main=paste("RMSE density for model ", model, ", n=", samplesize, ", i=", numind, ", e=", errorvar, sep=''), sub='', ylab='')
							
#						for (e in 2:ncol(s)) {
#							lines(density(s[, e]), lwd=2, col=colors[e])
#						}
#						legend(x="topright", legend=colnames(s), fill=colors, col=colors, border="black")
#						dev.off()
					}
				}
			}
		}
	}

	colnames(t.test.results) <- c('Model', 'Condition', 'SampleSize', 'NumInd', 'ErrorVar', 'M1', 'M2', 'P.Value', 'DF', 'T', 'MeanDiff')

	write.csv(t.test.results, file=paste(fname.stem, '.t.test.results.csv', sep=''), row.names=F)
	t.test.results <- read.csv(file=paste(fname.stem, '.t.test.results.csv', sep=''))
	write.csv(format(t.test.results, scientific=FALSE, digits=5), file=paste(fname.stem, '.t.test.results.csv', sep=''), row.names=F)
  }
}

registerDoParallel(cores=16)
run('setA_prediction.formative.100reps.condition0', 100)

