
#  Require loading packages caret, nnet, metrics
# load data set, example: set <- read.csv("data_set.csv", header=T)
# load descriptor combinations as combinations.list after loading 'combinations.RData'
 
train.fold1 <- c(4,  5,  8,  9, 11, 13, 18, 20, 24, 25, 27, 28, 30, 33, 35, 36, 39, 40, 42, 43, 46, 47, 49, 51, 55, 56, 58, 59, 62, 63, 66, 67, 69, 71, 73, 75, 79, 80, 83, 84, 89, 90, 93, 94, 96, 97, 99, 100, 102, 103, 105, 106, 108, 110, 112, 113, 115, 117, 119, 120)

train.fold2 <- c(3,  5,  6,  9, 10, 13, 14, 20, 21, 25, 26, 28, 29, 33, 34, 36, 37, 40, 41, 43, 45, 47, 48, 51, 54, 56, 57, 59, 60, 63, 64, 67, 68, 71, 72, 75, 78, 80, 82, 84, 86, 90, 92, 94, 95, 97, 98, 100, 101, 103, 104, 106, 107, 110, 111, 113, 114, 117, 118, 120)

train.fold3 <- c(3,  4,  6,  8, 10, 11, 14, 18, 21, 24, 26, 27, 29, 30, 34, 35, 37, 39, 41, 42, 45, 46, 48, 49, 54, 55, 57, 58, 60, 62, 64, 66, 68, 69, 72, 73, 78, 79, 82, 83, 86, 89, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 111, 112, 114, 115, 118, 119)

validation <- c(1,  2,  7, 12, 15, 16, 17, 19, 22, 23, 31, 32, 38, 44, 50, 52, 53, 61, 65, 70, 74, 76, 77, 81, 85, 87, 88, 91 ,109, 116)

response <- 441
tcontrol <- trainControl(method = "none", predictionBounds=c(T,F))
ngridnn <- expand.grid(size= 2,decay= 0.001)

results.m <- matrix(0, ncol= length(combinations.list), nrow=length(validation))  ; # number of rows is equal to number of validation lipids 
av.predicted <- matrix(0,ncol=1,nrow= length(validation))

for(i in 1:length(combinations.list))
{
if(i<101){train  <- train.fold1}
if(i>100 & i < 201){train  <- train.fold2}
if(i>200){train  <- train.fold3}

new_i <- combinations.list[[i]]
set.seed(5)
try(lipids_model <- train(set[train,new_i] , set[train,response], method = "nnet", tuneGrid = ngridnn, trControl = tcontrol,metric="RMSE",maximize= F,scaled=F, linout=T,trace=F,maxit=200))
predicted <- predict(lipids_model,newdata=set[validation,new_i])
results.m[,i] <- predicted

}  

# calculate average in vivo log(dose) predictions
for(i in 1:nrow(results.m)){av.predicted[i] <- median(results.m[i,]) }
# rmse for the average predicted  log(dose)
rmse(av.predicted, set[validation,response])
# R2 for the average predicted  log(dose)
cor(av.predicted, set[validation,response])^2

