### bart16eAll.R
### Version 3.2 2017 Nov 5
### Thomas Bartz-Beielstein and Martin Zaefferer
### Note: The files "bart16e.csv" and "cyclone.R" 
### have to be in the 
### current working directory of R.
### Please set working directory to source file location!

# make sure to install current SPOT, MASS and DEoptim version:
# install.packages("SPOT")
# install.packages("DEoptim")
# install.packages("MASS")
# then load SPOT, MASS  and DEoptim  with:
require("SPOT")
require("DEoptim")
require("MASS")

# load current version of cyclone analytical model functions
source('cyclone.R')

#############################################################
# Data
# The data set "bart16e.csv" contains all data.
# lines | description | where
# 1-17  | Mothes MAM EM  | Table 3, column EM
# 18-26 | CFD | Table 5, column EC
# 27-71 | 3D | Table 5, column EP 
# 72-80 | Mothes | Table 5, column EM
# 81-89 | Barth MAB | Table 5, column EB
# 92-106 | Barth MAB EB | Table 3, column EB
##############################################################


####################################################################
### generate the boxplots (Fig. 4) and to calculate 
### means and standard deviations of the 3D-printing data (used in column $M_P$ in Table 5).

## read and pre-process data
d0 <- read.csv("bart16e.csv", sep = ",")
d0 <- d0[d0$SourceE == "3D",]
d0 <- droplevels(d0)
d0$ht <- as.factor(d0$ht)
d0$Cyclone <- as.factor(d0$Cyclone)
boxplot(d0$E ~d0$ht + d0$Cyclone)

## means and deviations with outliers
aggregate(E~Cyclone+ht,data=d0,FUN=mean)
### Expected results:
# Cyclone ht      E
# 1       L  0 86.800
# 2       M  0 72.534
# 3       S  0 90.532
# 4       L 35 94.500
# 5       M 35 86.834
# 6       S 35 95.434
# 7       L 44 90.836
# 8       M 44 92.866
# 9       S 44 95.498
aggregate(E~Cyclone+ht,data=d0,FUN=sd)
### Expected results:
# Cyclone ht         E
# 1       L  0  4.394536
# 2       M  0 17.792772
# 3       S  0  5.676986
# 4       L 35  6.916788
# 5       M 35  9.546451
# 6       S 35  2.805126
# 7       L 44  3.837679
# 8       M 44  2.938406
# 9       S 44  2.500334

## remove outliers:
d2 <- d0[d0$E < 100 & d0$E > 80,]
d2 <- droplevels(d2)

## plot
boxplot(d2$E ~d2$ht + d2$Cyclone)
boxplot(d2$E ~d2$Cyclone + d2$ht)
pdf(file="Box2.pdf")
par(mfrow=c(1,2))
boxplot(d2$E ~d2$Cyclone, horizontal = FALSE)
boxplot(d2$E ~d2$ht, horizontal = FALSE)
dev.off()

# values for column M-P in Table 5:
aggregate(E~Cyclone+ht,data=d2,FUN=mean)
### Expected results:
# Cyclone ht        E
# 1       L  0 86.80000
# 2       M  0 88.50000
# 3       S  0 90.53200
# 4       L 35 92.12500
# 5       M 35 93.66667
# 6       S 35 94.29250
# 7       L 44 90.83600
# 8       M 44 92.86600
# 9       S 44 95.49800

aggregate(E~Cyclone+ht,data=d2,FUN=sd)
### Expected results:
# Cyclone ht        E
# 1       L  0 4.394536
# 2       M  0 8.244865
# 3       S  0 5.676986
# 4       L 35 5.117138
# 5       M 35 2.165002
# 6       S 35 1.343438
# 7       L 44 3.837679
# 8       M 44 2.938406
# 9       S 44 2.500334

###########################################################
### Standard example from [Loef88a, p. 93]
### Note: funCyclone() returns negative values
### Results from Loeffler:
### Pressure drop: delta_p = 1527 N/m^2
### Oververall collection efficiency: 0.98
### Collection efficiency intlet: 0.86
### Due to rounding errors in [Loef88a], we obtain different results:
### delta_p = 1620.5239150   
### E = 0.9681276
### E_w = 0.8862408
funCyclone(c(1.26,2.5,.42,.65,.6,.2),fluid=list(Mu=1.85e-5,Ve=(50/36)/0.12,lambdag=1/200,Rhop=2000,Rhof=1.2,Croh=0.05))

### Data for the present case study:
### intervals of the particle size distribution
intervals <- c(0,1.,2.7,5.5,8.7,12.7,16.9,21.1,25.4,30.8,63)*1e-6 
## values for each interval
delta <- rep(0.1, length(intervals)-1)

h <- 160/1000
g <- read.csv("bart16e.csv", sep = ",")

## Remove outliers:
g <- g[g$E < 100 & g$E > 80,]
g <- droplevels(g)

res <- c()
for (i in 1:17){
  df <- data.frame(g[i,1:7])
  be <- df$be/1000
  Da <- df$Da/1000
  Dt <- df$Dt/1000
  he <- df$he/1000
  ht <- df$ht/1000
  
  ### Process parameters:
  ## Inlet gas velocity # [m/s]
  ve <- 20
  
  ## Wall friction coefficient   
  lambdag <- 1/200
  ## viscosity
  mu <-  1.8e-5; # [Pa s] = [kg/(m s)] 
  ## gas density
  rhof <- 1.2 # [kg/m^3] 
  ## particle density
  rhop <- 2700 # [kg/m^3] 
  
  ## gas flow rate
  vp <- ve*be*he # [m^3 / s]
  
  ## raw gas concentration: 6g dust particles in 10 seconds
  croh <- 6*10^(-3)/(vp * 10)   # [kg / m^3] 
  
  ##  calculate collection efficiency E
  y <- funCyclone(x=c(Da,h,Dt,ht,he,be),
                  fluid=list(Mu=mu,Ve=ve,lambdag=lambdag,Rhop=rhop,Rhof=rhof,Croh=croh),
                  intervals=intervals,
                  delta=delta)
  
  res <- c(res, -y[3] * 100) 
}

##################################
## print all results (Column E_B in Table 3)
print(res) 
# results are presented in tab:cyclonelit
### Expected results:
### 89.33342 89.91305 89.05085 89.59774 91.40258 86.98972 89.63928 92.47415 90.49295 83.86136 87.76394 89.38059 86.63508 81.21417 92.12717 88.68418 88.86535

##################################
## build linear model
X <- g[1:17,1:7]
df1 <- data.frame(X,res)
names(df1)
lm1 <- lm(res ~ he + be + Dt + ht  + Da , data=df1)
summary(lm1)
### Expected results:
# Coefficients:
#   Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 95.53407    0.73580 129.837  < 2e-16 ***
# he           0.02618    0.03722   0.703    0.496    
# be           0.02788    0.06143   0.454    0.659    
# Dt          -0.59629    0.06517  -9.149 1.78e-06 ***
# ht          -0.02441    0.02253  -1.083    0.302    
# Da           0.11081    0.01195   9.273 1.56e-06 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 0.7723 on 11 degrees of freedom
# Multiple R-squared:  0.9488,	Adjusted R-squared:  0.9256 
# F-statistic: 40.78 on 5 and 11 DF,  p-value: 9.794e-07


## refine linear model with stepwise AIC
lm2 <- stepAIC(lm1, scope= list(upper = ~ he + be + Dt + ht + Da , lower = ~ 1), trace=TRUE)
## this model is shown in the article based on Barth
## The model shown in the article uses the Barth approach, because at this stage of the experimentation,
## problems with the Barth model were unknown 
###################################################################
## This is Eq. 2 (E_S = 95.56 − 0.58Dt + 0.11Da)
summary(lm2) 
#### Expected results:
# Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 95.56037    0.66265 144.209  < 2e-16 ***
#   Dt          -0.57948    0.03891 -14.892 5.60e-10 ***
#   Da           0.10610    0.01072   9.893 1.07e-07 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 0.7372 on 14 degrees of freedom
# Multiple R-squared:  0.9406,	Adjusted R-squared:  0.9322 
# F-statistic: 110.9 on 2 and 14 DF,  p-value: 2.595e-09

library("xtable")
xtable(lm2)


#######################
### Table 5: (M-AB):

res <- c()
for (i in 1:3){  # Loeffler, Muschelknautz, Stairmand
  for(ht in c(0, 35/1000, 44/1000)){
    df <- data.frame(g[i,1:7])
    be <- df$be/1000
    Da <- df$Da/1000
    Dt <- df$Dt/1000
    he <- df$he/1000
    
    ### Process parameters:
    ## Inlet gas velocity # [m/s]
    ve <- 20
    
    ## Wall friction coefficient   
    lambdag <- 1/200
    ## viscosity
    mu <-  1.8e-5; # [Pa s] = [kg/(m s)] 
    ## gas density
    rhof <- 1.2 # [kg/m^3] 
    ## particle density
    rhop <- 2700 # [kg/m^3] 
    
    ## gas flow rate
    vp <- ve*be*he # [m^3 / s]
    
    ## raw gas concentration: 6g dust particles in 10 seconds
    croh <- 6*10^(-3)/(vp * 10)   # [kg / m^3] 
    
    ##  calculate collection efficiency E
    y <- funCyclone(x=c(Da,h,Dt,ht,he,be),
                    fluid=list(Mu=mu,Ve=ve,lambdag=lambdag,Rhop=rhop,Rhof=rhof,Croh=croh),
                    intervals=intervals,
                    delta=delta)
    
    res <- c(res, -y[3] * 100) #Ew.
  }
}
print(res) # Column E_B(M-AB)
### Expected results:
### 90.19061 89.48910 89.27401 91.14847 90.36887 90.15098 89.44092 88.69942 88.45412
##################################
##################################

###  From here, we switch to the Mothes model

### Standard example from loeffler (only testing)
funCyclone(c(1.26,2.5,.42,.65,.6,.2),fluid=list(Mu=1.85e-5,Ve=(50/36)/0.12,lambdag=1/200,Rhop=2000,Rhof=1.2,Croh=0.05))
funCyclone(c(1.26,2.5,.42,.65,.6,.2),fluid=list(Mu=1.85e-5,Ve=(50/36)/0.12,lambdag=1/200,Rhop=2000,Rhof=1.2,Croh=0.05), model = "Mothes")


## Remove outliers:
g <- g[g$E < 100 & g$E > 80,]
g <- droplevels(g)

res <- c()
for (i in 1:17){
  df <- data.frame(g[i,1:7])
  be <- df$be/1000
  Da <- df$Da/1000
  Dt <- df$Dt/1000
  he <- df$he/1000
  ht <- df$ht/1000
  
  ### Process parameters:
  ## Inlet gas velocity # [m/s]
  ve <- 20
  
  ## Wall friction coefficient   
  lambdag <- 1/200
  ## viscosity
  mu <-  1.8e-5; # [Pa s] = [kg/(m s)] 
  ## gas density
  rhof <- 1.2 # [kg/m^3] 
  ## particle density
  rhop <- 2700 # [kg/m^3] 
  
  ## gas flow rate
  vp <- ve*be*he # [m^3 / s]
  
  ## raw gas concentration: 6g dust particles in 10 seconds
  croh <- 6*10^(-3)/(vp * 10)   # [kg / m^3] 
  
  ##  calculate collection efficiency E
  y <- funCyclone(x=c(Da,h,Dt,ht,he,be),
                  fluid=list(Mu=mu,Ve=ve,lambdag=lambdag,Rhop=rhop,Rhof=rhof,Croh=croh),
                  intervals=intervals,
                  delta=delta,
                  model = "Mothes")
  
  res <- c(res, -y[3] * 100) 
}
##################################
###  print all results (Column E_M in Table 3)
### 90.52129 91.10506 87.91805 91.32686 87.03686 88.29026 90.36848 89.48944 92.15930 87.45033 89.79843 90.58145 89.49694 84.33156 91.24533 90.47405 89.00862
print(res) 
# results are presented in tab:cyclonelit


############################################################
### Only testing results from stacking (values are generated to check plausibility)
#he        be        Dt        ht        hz        Da        Du 
#8.233044 19.996112 29.369267 59.049553 29.878185 39.015808  5.288277 
#he        be        Dt        ht        hz        Da        Du 
#8.240264 19.995768 29.394082 59.147213 29.931097 40.004893  5.283654 

Da = 40/1000
h = 160/1000
Dt = 29.40/1000
ht = 59.14/1000
he = 8.24/1000
be = 19/1000

y <- funCyclone(x=c(Da,h,Dt,ht,he,be),
                fluid=list(Mu=mu,Ve=ve,lambdag=lambdag,Rhop=rhop,Rhof=rhof,Croh=croh),
                intervals=intervals,
                delta=delta,
                model = "Mothes")

print(-y[3] * 100)
####################################
### Expected result:
###  90.88384

## build linear model
X <- g[1:17,1:7]
df1 <- data.frame(X,res)
names(df1)
lm1 <- lm(res ~ he + be + Dt + ht  + Da , data=df1)
summary(lm1)
### Expected result:
# Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 89.66336    1.05740  84.796  < 2e-16 ***
#   he          -0.04949    0.05349  -0.925  0.37467    
#   be           0.21919    0.08828   2.483  0.03042 *  
#   Dt          -0.37765    0.09366  -4.032  0.00197 ** 
#   ht           0.07931    0.03238   2.449  0.03231 *  
#   Da           0.07352    0.01717   4.282  0.00130 ** 
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 1.11 on 11 degrees of freedom
# Multiple R-squared:  0.7785,	Adjusted R-squared:  0.6778 
# F-statistic: 7.733 on 5 and 11 DF,  p-value: 0.002421

## refine linear model with stepwise AIC
require(MASS) # if MASS is not available: install.packages("MASS")
lm2 <- stepAIC(lm1, scope= list(upper = ~ he + be + Dt + ht + Da , lower = ~ 1), trace=TRUE)
summary(lm2) 
library("xtable")
xtable(lm2)
######################
## this model is the Mothes equivalent to the eq. 2 in the article, which is based on the Barth model
## this model is NOT shown in the article, because it uses the Mothes approach.
## The model shown in the article uses the Barth approach, because at this stage of the experimentation,
## problems with the Barth model were unknown 
###############################################################
## This is the eqaution in Section 5: 
## E_M = 90.40 − 0.25Dt + 0.08Da.
lm3 <- lm(res ~ Dt +  Da , data=df1)
summary(lm3)
### Expected result:
# Estimate Std. Error t value Pr(>|t|)    
# (Intercept) 90.40157    1.21263  74.550  < 2e-16 ***
#   Dt          -0.25167    0.07121  -3.534 0.003303 ** 
#   Da           0.08484    0.01963   4.323 0.000702 ***
#   ---
#   Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
# 
# Residual standard error: 1.349 on 14 degrees of freedom
# Multiple R-squared:  0.5835,	Adjusted R-squared:  0.524 
# F-statistic: 9.808 on 2 and 14 DF,  p-value: 0.002173

#######################
### Table 5: E_M(M_AM), based on mothes:

res <- c()
for (i in 1:3){  # Loeffler, Muschelknautz, Stairmand
  for(ht in c(0, 35/1000, 44/1000)){
    df <- data.frame(g[i,1:7])
    be <- df$be/1000
    Da <- df$Da/1000
    Dt <- df$Dt/1000
    he <- df$he/1000
    
    ### Process parameters:
    ## Inlet gas velocity # [m/s]
    ve <- 20
    
    ## Wall friction coefficient   
    lambdag <- 1/200
    ## viscosity
    mu <-  1.8e-5; # [Pa s] = [kg/(m s)] 
    ## gas density
    rhof <- 1.2 # [kg/m^3] 
    ## particle density
    rhop <- 2700 # [kg/m^3] 
    
    ## gas flow rate
    vp <- ve*be*he # [m^3 / s]
    
    ## raw gas concentration: 6g dust particles in 10 seconds
    croh <- 6*10^(-3)/(vp * 10)   # [kg / m^3] 
    
    ##  calculate collection efficiency E
    y <- funCyclone(x=c(Da,h,Dt,ht,he,be),
                    fluid=list(Mu=mu,Ve=ve,lambdag=lambdag,Rhop=rhop,Rhof=rhof,Croh=croh),
                    intervals=intervals,
                    delta=delta,
                    model = "Mothes")
    
    res <- c(res, -y[3] * 100) #Ew.
  }
}
#########################################
### Note, if h_t == 0, Mothes can not calculate efficiency, because immersion depth must be positive:
print(res) # Column E_M (M_AM) in Table 5 ("outliers" removed)
### -1.303055e+64  9.045156e+01  9.054230e+01 -3.127395e+55  9.101896e+01  9.107228e+01 -9.433329e+89  8.983430e+01  9.057406e+01
#########################################

########################################
### OMSS optimization starts here:
########################################
#initializes RNG seed
set.seed(123)
#read data file
# Analytical values are from the Mothes Models 
df <- read.csv("bart16e.csv")
# use Mothes based data (EM) only:
df <- df[-c(81:106),]

# # ## use openFoam v5.0 improved results (Mail Pham 30.7.2017):
df$E[18] == 88.57
df$E[19] == 90.91
df$E[20] == 90.19
df$E[21] == 82.04
df$E[22] == 82.55
df$E[23] == 82.43
df$E[24] == 91.2
df$E[25] == 95.95
df$E[26] == 95.6

dim(df) == c(80,15)

# Mothes model cannot handle ht==0, so we remove these results:
df <- df[-c(72,75,78),]

dim(df) == c(77,15)

#select relevant columns from data
df <- df[, c("E", "he","be","Dt","ht","hz","Da","Du")]
df$he <- df$he / df$hz
df$be <- df$be / df$Da
df$Dt <- df$Dt / df$Da
df$ht <- df$ht / df$hz
df$Du <- df$Du / df$Da
df$hz <- df$hz / 160
df$Da <- df$Da / 160
#manually remove some outliers
df <- df[ df$E < 100,]
df <- df[ df$E > 80,]
#split into input and output data
x <- as.matrix(df[,c("he","be","Dt","ht","hz","Da","Du")])
y <- as.matrix(df[,"E",drop=F])
#because we will do minimization (and E is maximized):
y <- -y 
#build stacked ensemble
# Note, warnings are caused by an external package (random forest) ond not by OMMS
st <- buildEnsembleStack(x, y)  
#####################################
### The following is Eq. 3 (level1 model)
print(st$fit1$fit)
### Expected result:
# Coefficients:
#   (Intercept)           V1           V2           V3  
# -51.3839       0.4021       0.9243      -0.8950  
########################################

#generate a target function from the fit
fun <- evaluateModel(st)
#repair function (minimal absolute dimensions of inlet)
repfun <- function(x){
  x[2] <- x[6]*x[2]*160
  x[2] <- max(x[2],15)
  x[2] <- x[2]/(x[6]*160)
  x[1] <- x[5]*x[1]*160
  x[1] <- max(x[1],15)
  x[1] <- x[1]/(x[5]*160)
  x
}
#wrapper for optimization algorithm (DE uses vector inputs)
f <- function(x){
  ### inlet pipe dimension correction:
  x <- repfun(x)
  fun(matrix(x,1))
}

#determine search space bounds
lo <- apply(x,2,min) #lower bounds
up <- apply(x,2,max) #upper bounds
up["ht"] <- min(1,up["ht"]) #note: larger ht are possible, but not desirable.
print(lo)

# 0.18236207 0.08532921 0.24997854 0.00000000 0.18525000 0.16087500 0.17531603 
print(up)
# 1.0000000 0.3738202 0.7421759 1.0000000 0.5627500 0.7280625 0.7291971 
#optimize the target function. Note, that this may require some time to compute.
set.seed(12345) #initializes RNG seed
optimum <-  DEoptim(fn = f,lower=lo,upper=up,
                    control=list(trace=TRUE,iter=400,reltol=1e-4,steptol=20))

xbest <-  optimum$optim$bestmem
xbest <- repfun(xbest)
ybest <-  optimum$optim$bestval

xbest["Da"] <- xbest["Da"] * 160
xbest["hz"] <- xbest["hz"] * 160 
xbest["Du"] <- xbest["Du"] * xbest["Da"] 
xbest["ht"] <- xbest["ht"] * xbest["hz"]  
xbest["Dt"] <- xbest["Dt"] * xbest["Da"] 
xbest["be"] <- xbest["be"] * xbest["Da"] 
xbest["he"] <- xbest["he"] * xbest["hz"] 

## These results are mentioned in Section 6:
#print results: best solution
xbest
## Expected: 15.00000 15.00000 28.92582 33.08806 44.77764 38.97725 14.64884 
#best solution scaled
xbest / 160
#fitness of best solution
ybest
### Expected: -93.68777

#########################################
#### Generate Fig. 5 (barplot)
#########################################
#compare resulting optimum with bounds....
res <- rbind(lo,optimum$optim$bestmem,up)
rownames(res) <- c("lower","optimum","upper")
colnames(res) <- c("he/hz","be/Da","Dt/Da","ht/hz","hz/h","Da/h","Du/Da")
pdf(file="de1.pdf")
barplot(res
        ,ylim = c(0, 1)
        ,main=""
        ,xlab="parameter"
        ,col = c(gray(.1), gray(.5), gray(.9))
        ,legend = rownames(res)
        ,args.legend=list(x=10, y= 120)
        ,beside = TRUE
        ,xpd = FALSE) 
dev.off()


###
# If you are interested, you can perform a second optimization step as follows. Note,
# geometry parameters have to be adapted!
# And the result from the CFD simulation or interpolation has to be added!
###

#append new result to data set
xc <- c(9.24,13.49,19.95,21.58,29.70,39.32,14.86) #values from meshed cyclone
xc <- xc / c(xc[5],xc[6],xc[6],xc[5],160,160,xc[6])  #scaled
x <- rbind(x, xc)
y <- rbind(y, -96.5) # result of the CFD simulation
#retrain the model ensemble
set.seed(1)
st <- buildEnsembleStack(x, y)  
fun <- evaluateModel(st)
f <- function(x){
  ### inlet pipe dimension correction:
  x <- repfun(x)
  fun(matrix(x,1))
}
#optimize the target function. Note, that this may require some time to compute.
set.seed(1)
optimum2 <-  DEoptim(fn = f,lower=lo,upper=up,
                     control=list(trace=TRUE,iter=400,reltol=1e-4,steptol=20))
xbest2 <-  optimum2$optim$bestmem
xbest2 <- repfun(xbest2)
xbest2["Da"] <- xbest2["Da"] * 160
xbest2["hz"] <- xbest2["hz"] * 160 
xbest2["Du"] <- xbest2["Du"] * xbest2["Da"] 
xbest2["ht"] <- xbest2["ht"] * xbest2["hz"]  
xbest2["Dt"] <- xbest2["Dt"] * xbest2["Da"] 
xbest2["be"] <- xbest2["be"] * xbest2["Da"] 
xbest2["he"] <- xbest2["he"] * xbest2["hz"] 
xbest2