# -------------------------------------------------------------------------------
#   This file is part of 'unityForest'.
#
# 'unityForest' is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# 'unityForest' is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with 'unityForest'. If not, see <http://www.gnu.org/licenses/>.
#
#  NOTE: 'unityForest' is a fork of the popular R package 'ranger', written by Marvin N. Wright.
#  Most R and C++ code is identical with that of 'ranger'. The package 'unityForest'
#  was written by taking the original 'ranger' code and making any
#  changes necessary to implement unity forests.
#
# -------------------------------------------------------------------------------

##' Constructs a unity forest and computes the unity variable importance measure (VIM), as described in Hornung & Hapfelmeier (2026). Categorical and continuous outcomes are supported.\cr
##' The unity forest algorithm is a tree construction approach for random forests in which the first few splits are optimized jointly in order to more effectively capture interaction effects beyond marginal effects. The unity VIM quantifies the influence of each variable under the conditions in which that influence is strongest, thereby placing a stronger emphasis on interaction effects than conventional variable importance measures.\cr
##' To explore the nature of the effects identified by the unity VIM, it is essential to examine covariate-representative tree roots (CRTRs), which are implemented in \code{\link{reprTrees}}.
##' 
##' There are two reasons why, for datasets with more than 100 variables, the default value of \code{prop.var.root} is set to 0.1 rather than to the square root of the number of variables divided by the total number of variables.
##' 
##' First, as the total number of variables increases, the square-root-based proportion decreases. This makes it less likely that the same pairs of variables are selected together in multiple trees. This can be problematic for the unity VIM, particularly for variables that do not have marginal effects on their own but act only through interactions with one or a few other variables. Such variables are informative in tree roots only when they are used jointly with the covariates they interact with. Setting \code{prop.var.root = 0.1} ensures that interacting covariates are selected together sufficiently often in tree roots.
##' 
##' Second, this choice reflects the fact that in high-dimensional datasets, typically only a small proportion of variables are informative. Applying the square-root rule in such settings may result in too few informative variables being selected, thereby reducing the likelihood of constructing predictive tree roots.
##' 
##' However, note that results obtained from applications of the unity forest framework to high-dimensional datasets should be interpreted with caution. For high-dimensional data, the curse of dimensionality makes the identification of individual interaction effects challenging and increases the risk of false positives. Moreover, the split points identified in the CRTRs (\code{\link{reprTrees}}) may become less precise as the number of covariates considered per tree root increases.
##' 
##' NOTE: The empirical evaluation of the unity forest framework (including the unity forest algorithm, the unity VIM, and covariate-representative tree roots) in Hornung & Hapfelmeier (2026) focused on categorical outcomes. Its performance for continuous outcomes has not been systematically investigated. Results for continuous outcomes should therefore be interpreted with appropriate caution.
##' 
##' @title Construct a unity forest prediction rule and compute the unity VIM.
##' @param formula Object of class \code{formula} or \code{character} describing the model to fit. Interaction terms supported only for numerical variables.
##' @param dependent.variable.name Name of the outcome variable, required if no formula is provided. For categorical outcomes, this variable must be coded as a \code{factor}.
##' @param data Training data of class \code{data.frame}, \code{matrix}, \code{dgCMatrix} (Matrix) or \code{gwaa.data} (GenABEL).
##' @param num.trees Number of trees. Default is 20000.
##' @param num.cand.trees Number of random candidate trees to generate for each tree root. Default is 500.
##' @param probability Grow a probability forest as in Malley et al. (2012). Default is \code{TRUE}. For categorical outcomes only.
##' @param importance Variable importance mode, either 'unity' (unity VIM) or 'none'.
##' @param prop.best.splits Related to the unity VIM. Default value should generally not be modified by the user. When calculating the unity VIM, only the top \code{prop.best.splits} \eqn{\times} 100\% of the splits -- those with the highest split criterion values weighted by node size -- are considered for each variable. The default value is 0.01, meaning that only the top 1\% of splits are used. While small values are recommended, they should not be set too low to ensure that each variable has a sufficient number of splits for a reliable unity VIM computation.
##' @param min.node.size.root Minimal node size in the tree roots. Default is 10 irrespective of the outcome type.
##' @param min.node.size Minimal node size. Default 5 for probability, 5 for classification, and 5 for continuous outcomes.
##' @param max.depth.root Maximal depth of the tree roots. Default value is 3 and should generally not be modified by the user. Larger values can be associated with worse predictive performance for some datasets.
##' @param max.depth Maximal tree depth. A value of NULL or 0 (the default) corresponds to unlimited depth, 1 to tree stumps (1 split per tree). Must be at least as large as \code{max.depth.root}.
##' @param prop.var.root Proportion of variables randomly sampled for constructing each tree root. Default is the square root of the number of variables divided by the number of variables. Consequently, per default, for each tree root, a random subset of variables is considered, with size equal to the (rounded up) square root of the total number of variables. An exception is made for datasets with more than 100 variables, where the default for \code{prop.var.root} is set to 0.1. See the 'Details' section below for explanation.
##' @param mtry.sprout Number of randomly sampled variables to possibly split at in each node of the tree sprouts (i.e., the branches of the trees beyond the tree roots). Default is the (rounded down) square root of the number variables.  
##' @param replace Sample with replacement. Default is \code{FALSE}.
##' @param sample.fraction Fraction of observations to sample for each tree. Default is 1 for sampling with replacement and 0.7 for sampling without replacement. 
##' @param case.weights Weights for sampling of training observations. Observations with larger weights will be selected with higher probability in the bootstrap (or subsampled) samples for the trees.
##' @param class.weights Weights for the outcome classes (in order of the factor levels) in the splitting rule (cost sensitive learning). Classification and probability prediction only. For classification the weights are also applied in the majority vote in terminal nodes.
##' @param inbag Manually set observations per tree. List of size \code{num.trees}, containing inbag counts for each observation. Can be used for stratified sampling.
##' @param oob.error Compute OOB prediction error. Set to \code{FALSE} to save computation time.
##' @param num.threads Number of threads to use.
##' The default is to use at most 2 threads (and at most the number of available CPU cores).
##' This conservative default avoids unintentionally using many cores on shared computing resources
##' (e.g., CI systems, servers, or HPC login/compute nodes).
##'
##' For typical use on a personal computer, setting \code{num.threads = 0} is strongly recommended,
##' as it uses all available CPU cores, which typically substantially reduces runtime.
##' @param write.forest Save \code{unityfor.forest} object, required for prediction. Set to \code{FALSE} to reduce memory usage if no prediction intended.
##' @param verbose Show computation status and estimated runtime.
##' @return Object of class \code{unityfor} with elements
##'   \item{\code{predictions}}{Predicted classes/class probabilities/values, based on out-of-bag samples.}
##'   \item{\code{forest}}{Saved forest (If write.forest set to TRUE). Note that the variable IDs in the \code{split.varIDs} object do not necessarily represent the column number in R.}
##'   \item{\code{data}}{Training data.}
##'   \item{\code{variable.importance}}{Variable importance for each independent variable. Only available if \code{importance} is not \code{"none"}.}
##'   \item{\code{importance.mode}}{Importance mode used.}
##'   \item{\code{prediction.error}}{Overall out-of-bag prediction error. For classification this is the fraction of missclassified samples, for probability estimation the Brier score and for continuous outcomes the mean squared error.}
##'   \item{\code{confusion.matrix}}{Contingency table for classes and predictions based on out-of-bag samples (classification only).}
##'   \item{\code{r.squared}}{R squared. Also called explained variance or coefficient of determination (continuous outcomes only). Computed on out-of-bag data.}
##'   \item{\code{call}}{Function call.}
##'   \item{\code{num.trees}}{Number of trees.}
##'   \item{\code{num.cand.trees}}{Number of candidate trees generated for each tree root.}
##'   \item{\code{num.independent.variables}}{Number of independent variables.}
##'   \item{\code{num.samples}}{Number of samples.}
##'   \item{\code{prop.var.root}}{Proportion of variables randomly sampled for each tree root.}
##'   \item{\code{mtry}}{Value of mtry used (in the tree sprouts).}
##'   \item{\code{max.depth.root}}{Maximal depth of the tree roots.}
##'   \item{\code{min.node.size.root}}{Minimal node size in the tree roots.}
##'   \item{\code{min.node.size}}{Value of minimal node size used.}
##'   \item{\code{splitrule}}{Splitting rule (used only in the tree sprouts).}
##'   \item{\code{replace}}{Sample with replacement.}
##'   \item{\code{treetype}}{Type of forest/tree. Categorical or continous outcome.}
##' @examples
##' 
##' ## IMPORTANT NOTE on parallelization:
##' ## The default uses at most 2 threads (num.threads) to avoid unintentionally
##' ## using many cores on shared systems.
##' ## However, for typical runs on a personal computer, set num.threads = 0 to 
##' ## use all available CPU cores; this is strongly recommended and can 
##' ## substantially reduce runtime.
##' ## Note: num.threads = 1 is used in the examples to avoid parallel
##' ## execution during package checks.
##' 
##' 
##' ## Load package:
##' 
##' library("unityForest")
##' 
##' 
##' ## Set seed to make results reproducible:
##' 
##' set.seed(1234)
##' 
##' 
##' ## Load wine dataset:
##' 
##' data(wine)
##' 
##' 
##' ## Construct unity forest and calculate unity VIM values:
##' 
##' model <- unityfor(dependent.variable.name = "C", data = wine,
##'                   importance = "unity", num.trees = 20, num.threads = 1)
##' 
##' # NOTE: num.trees = 20 (in the above) would be much too small for practical 
##' # purposes. This small number of trees was simply used to keep the
##' # runtime of the example short.
##' # The default number of trees is num.trees = 20000.
##' 
##' 
##' ## Inspect the rankings of the variables and variable pairs with respect to 
##' ## the unity VIM:
##' 
##' sort(model$variable.importance, decreasing = TRUE)
##' 
##' 
##' ## Prediction:
##' 
##' # Separate 'wine' dataset randomly in training
##' # and test data:
##' train.idx <- sample(nrow(wine), 2/3 * nrow(wine))
##' wine_train <- wine[train.idx, ]
##' wine_test <- wine[-train.idx, ]
##' 
##' # Construct unity forest on training data:
##' # NOTE again: num.trees = 20 is specified too small for practical purposes.
##' model_train <- unityfor(dependent.variable.name = "C", data = wine_train, 
##'                         importance = "none", num.trees = 20, 
##'                         probability = FALSE, num.threads = 1)
##' # NOTE: Because we are only interested in prediction here, we do not
##' # calculate unity VIM values (by setting importance = "none"), because 
##' # this speeds up calculations.
##' # Moreover, 'probability' is set to 'FALSE' because we are interested in pure
##' # class prediction in this example (without class probability prediction).
##' 
##' # Predict class values of the test data:
##' pred_wine <- predict(model_train, data = wine_test, num.threads = 1)
##' 
##' # Compare predicted and true class values of the test data:
##' table(wine_test$C, pred_wine$predictions)
##' 
##' 
##' 
##' ## Prediction for dataset with continuous outcome:
##' 
##' # Load stock dataset:
##' 
##' data(stock)
##' 
##' # Separate 'stock' dataset randomly in training
##' # and test data:
##' train.idx <- sample(nrow(stock), 2/3 * nrow(stock))
##' stock_train <- stock[train.idx, ]
##' stock_test <- stock[-train.idx, ]
##' 
##' # Construct unity forest on training data:
##' # NOTE again: num.trees = 20 is specified too small for practical purposes.
##' model_train <- unityfor(dependent.variable.name = "company10", 
##'                         data = stock_train, importance = "none", 
##'                         num.trees = 20, num.threads = 1)
##' # NOTE: Because we are only interested in prediction here, we do not
##' # calculate unity VIM values (by setting importance = "none"), because 
##' # this speeds up calculations.
##' 
##' # Predict outcome values of the test data:
##' pred_stock <- predict(model_train, data = stock_test, num.threads = 1)
##' 
##' # Compare predicted and true outcome values of the test data:
##' plot(pred_stock$predictions, stock_test$company10)
##' 
##' @author Roman Hornung, Marvin N. Wright
##' @references
##' \itemize{
##'   \item Hornung, R., Hapfelmeier, A. (2026). Unity Forests: Improving Interaction Modelling and Interpretability in Random Forests. arXiv:2601.07003, <\doi{10.48550/arXiv.2601.07003}>.
##'   \item Wright, M. N., Ziegler, A. (2017). ranger: A fast implementation of random forests for high dimensional data in C++ and R. Journal of Statistical Software 77:1-17, <\doi{10.18637/jss.v077.i01}>.
##'   \item Breiman, L. (2001). Random forests. Machine Learning 45:5-32, <\doi{10.1023/A:1010933404324}>.
##'   \item Malley, J. D., Kruppa, J., Dasgupta, A., Malley, K. G., & Ziegler, A. (2012). Probability machines: consistent probability estimation using nonparametric learning machines. Methods of Information in Medicine 51:74-81, <\doi{10.3414/ME00-01-0052}>.
##'   }
##' @seealso \code{\link{predict.unityfor}}
##' @encoding UTF-8
##' @useDynLib unityForest, .registration = TRUE
##' @importFrom Rcpp evalCpp
##' @import stats 
##' @import utils
##' @importFrom Matrix Matrix
##' @export
unityfor <- function(formula = NULL, dependent.variable.name = NULL, data = NULL, num.trees = 20000, 
                     num.cand.trees = 500, probability = TRUE, importance = "none", prop.best.splits = NULL, 
                     min.node.size.root = NULL, min.node.size = NULL, max.depth.root = NULL, 
                     max.depth = NULL, prop.var.root = NULL, mtry.sprout = NULL, replace = FALSE, 
                     sample.fraction = ifelse(replace, 1, 0.7), case.weights = NULL, class.weights = NULL, 
                     inbag = NULL, oob.error = TRUE, num.threads = NULL,
                     write.forest = TRUE, verbose = TRUE) {
					 
  classification <- NULL
  
  ## For unity forests we always order the categories of categorical variables:
  respect.unordered.factors <- "order"
  save.memory <- FALSE
  
  ## For the representative tree algorithm, we need the inbag counts:
  keep.inbag <- TRUE
  
  ## ## GenABEL GWA data
  ## if ("gwaa.data" %in% class(data)) {
  ##   stop("Error: Ordering of SNPs currently not implemented for unity forests.")
  ## }
  
  ## GenABEL GWA data
  if ("gwaa.data" %in% class(data)) {
    snp.names <- data@gtdata@snpnames
    snp.data <- data@gtdata@gtps@.Data
    data <- data@phdata
    if ("id" %in% names(data)) {
      data$"id" <- NULL
    }
    gwa.mode <- TRUE
    save.memory <- FALSE
  } else {
    snp.data <- as.matrix(0)
    gwa.mode <- FALSE
  }
  
  ## Sparse matrix data
  if (inherits(data, "Matrix")) {
    if (!("dgCMatrix" %in% class(data))) {
      stop("Error: Currently only sparse data of class 'dgCMatrix' supported.")
    }
    
    if (!is.null(formula)) {
      stop("Error: Sparse matrices only supported with alternative interface. Use dependent.variable.name instead of formula.")
    }
  }
  
  ## Formula interface. Use whole data frame is no formula provided and depvarname given
  if (is.null(formula)) {
    if (is.null(dependent.variable.name)) {
      stop("Error: Please give formula or outcome variable name.")
    }
    response <- data[, dependent.variable.name, drop = TRUE]
    data.selected <- data
  } else {
    formula <- formula(formula)
    if (!inherits(formula, "formula")) {
      stop("Error: Invalid formula.")
    }
    data.selected <- parse.formula(formula, data, env = parent.frame())
    response <- data.selected[, 1]
  }
  
  ## Check missing values
  if (any(is.na(data.selected))) {
    offending_columns <- colnames(data.selected)[colSums(is.na(data.selected)) > 0]
    stop("Missing data in columns: ",
         paste0(offending_columns, collapse = ", "), ".", call. = FALSE)
  }
  
  ## Check response levels
  if (is.factor(response)) {
    if (nlevels(response) != nlevels(droplevels(response))) {
      dropped_levels <- setdiff(levels(response), levels(droplevels(response)))
      warning("Dropped unused factor level(s) in outcome variable: ",
              paste0(dropped_levels, collapse = ", "), ".", call. = FALSE)
    }
  }
  
  ## Treetype
  if (is.factor(response)) {
    if (probability) {
      treetype <- 9
    } else {
      treetype <- 1
    }
  } else if (is.numeric(response) && (is.null(ncol(response)) || ncol(response) == 1)) {
    if (!is.null(classification) && classification && !probability) {
      treetype <- 1
    } else {
      treetype <- 3
    }
    #if (!is.null(classification) && classification && !probability) {
    #  treetype <- 1
    #} else if (probability) {
    #  treetype <- 9
    #} else {
    #  treetype <- 3
    #}
  } else if (inherits(response, "Surv") || is.data.frame(response) || is.matrix(response)) {
    treetype <- 5
  } else {
    stop("Error: Unsupported type of outcome variable.")
  }
  
  ## Dependent and status variable name. For non-survival dummy status variable name.
  if (!is.null(formula)) {
    dependent.variable.name <- names(data.selected)[1]
    independent.variable.names <- names(data.selected)[-1]
  } else {
    independent.variable.names <- colnames(data.selected)[colnames(data.selected) != dependent.variable.name]
  }
  
  ## Recode characters as factors and recode factors if 'order' mode
  if (!is.matrix(data.selected) && !inherits(data.selected, "Matrix")) {
    character.idx <- sapply(data.selected, is.character)
    
    ## Recode characters and unordered factors
    names.selected <- names(data.selected)
    ordered.idx <- sapply(data.selected, is.ordered)
    factor.idx <- sapply(data.selected, is.factor)
    independent.idx <- names.selected != dependent.variable.name
    recode.idx <- independent.idx & (character.idx | (factor.idx & !ordered.idx))
    
    ## Numeric response
    if (is.factor(response)) {
      num.response <- as.numeric(response)
    } else {
      num.response <- response
    }
    
    ## Recode each column
    data.selected[recode.idx] <- lapply(data.selected[recode.idx], function(x) {
      if (!is.factor(x)) {
        x <- as.factor(x)
      } 
      
      if (is.factor(response) & nlevels(response) > 2) {
        levels.ordered <- pca.order(y = response, x = x)
      } else {
        ## Order factor levels by mean response
        means <- sapply(levels(x), function(y) {
          mean(num.response[x == y])
        })
        levels.ordered <- as.character(levels(x)[order(means)])
      }
      
      ## Return reordered factor
      factor(x, levels = levels.ordered, ordered = TRUE, exclude = NULL)
    })
    
    ## Save levels
    covariate.levels <- lapply(data.selected[independent.idx], levels)
  }
  
  ## Input data and variable names, create final data matrix
  if (!is.null(formula) && treetype == 5) {
    data.final <- data.matrix(cbind(response[, 1], response[, 2],
                                    data.selected[-1]))
    colnames(data.final) <- c(dependent.variable.name, independent.variable.names)
  } else if (is.matrix(data.selected) || inherits(data.selected, "Matrix")) {
    data.final <- data.selected
  } else {
    data.final <- data.matrix(data.selected)
  }
  variable.names <- colnames(data.final)
  
  all.independent.variable.names <- independent.variable.names
  
  ## Error if no covariates
  if (length(all.independent.variable.names) < 1) {
    stop("Error: No covariates found.")
  }
  
  ## Number of trees
  if (!is.numeric(num.trees) || num.trees < 1) {
    stop("Error: Invalid value for num.trees.")
  }
  
  ## mtry.sprout
  if (is.null(mtry.sprout)) {
    mtry.sprout <- 0
  } else if (!is.numeric(mtry.sprout) || mtry.sprout < 0) {
    stop("Error: Invalid value for mtry.sprout")
  }
  
  ## prop.best.splits
  if (is.null(prop.best.splits)) {
    prop.best.splits <- 0.01
  } else if (!is.numeric(prop.best.splits) || prop.best.splits < 0) {
    stop("Error: Invalid value for mtry.sprout")
  } else if (prop.best.splits > 1) {
    warning("prop.best.splits value must be no larger than 1. --> Set to 1.")
  }
  
  ## Number of candidate trees for each tree root
  if (!is.numeric(num.cand.trees) || num.cand.trees < 0) {
    stop("Error: Invalid value for num.cand.trees")
  }
    
  ## Num threads
  ## Default: use at most 2 threads. Set num.threads = 0 to use all available cores.
  if (is.null(num.threads)) {
    num.threads <- 2L
  } else if (!is.numeric(num.threads) || num.threads < 0) {
    stop("Error: Invalid value for num.threads")
  }
  
  ## Respect core limits during R CMD check / CRAN-like checks
  ## (only if running under package checks)
  if (nzchar(Sys.getenv("_R_CHECK_PACKAGE_NAME_", unset = ""))) {
    limit_cores <- Sys.getenv("_R_CHECK_LIMIT_CORES_", unset = "")
    if (nzchar(limit_cores)) {
      lim <- suppressWarnings(as.integer(limit_cores))
      if (!is.na(lim) && lim > 0) {
        if (num.threads == 0) {
          num.threads <- lim
        } else {
          num.threads <- min(num.threads, lim)
        }
      }
    }
  }
  
  ## Minumum node size
  if (is.null(min.node.size)) {
    min.node.size <- 0
  } else if (!is.numeric(min.node.size) || min.node.size < 0) {
    stop("Error: Invalid value for min.node.size")
  }
  
  ## Minumum node size in the tree roots
  if (is.null(min.node.size.root)) {
    min.node.size.root <- 0
  } else if (!is.numeric(min.node.size.root) || min.node.size.root < 0) {
    stop("Error: Invalid value for min.node.size.root")
  }
  
  ## Tree depth
  if (is.null(max.depth)) {
    max.depth <- 0
  } else if (!is.numeric(max.depth) || max.depth < 0) {
    stop("Error: Invalid value for max.depth. Please give a positive integer.")
  }
  
  ## Depth of the roots of the trees
  if (is.null(max.depth.root)) {
    max.depth.root <- 3
  } else if (!is.numeric(max.depth.root) || max.depth.root < 0) {
    stop("Error: Invalid value for max.depth. Please give a positive integer.")
  } else if ((max.depth > 0) && (max.depth.root > max.depth)) {
    warning(paste0("Error: max.depth must be larger than max.depth.root. --> max.depth set to max.depth.root = ", max.depth.root, "."))
    max.depth <- max.depth.root
  }
  
  ## Proportion of variables randomly sampled for each tree root:
  if (is.null(prop.var.root)) {
    prop.var.root <- 0
  } else if (!is.numeric(prop.var.root) || prop.var.root <= 0 || prop.var.root > 1) {
    stop("Error: Invalid value for prop.var.root. Must be a value greater than 0 with a maximum of 1.")
  }
  
  ## Sample fraction
  if (!is.numeric(sample.fraction)) {
    stop("Error: Invalid value for sample.fraction. Please give a value in (0,1] or a vector of values in [0,1].")
  }
  if (length(sample.fraction) > 1) {
    if (!(treetype %in% c(1, 9))) {
      stop("Error: Invalid value for sample.fraction. Vector values only valid for classification forests.")
    }
    if (any(sample.fraction < 0) || any(sample.fraction > 1)) {
      stop("Error: Invalid value for sample.fraction. Please give a value in (0,1] or a vector of values in [0,1].")
    }
    if (sum(sample.fraction) <= 0) {
      stop("Error: Invalid value for sample.fraction. Sum of values must be >0.")
    }
    if (length(sample.fraction) != nlevels(response)) {
      stop("Error: Invalid value for sample.fraction. Expecting ", nlevels(response), " values, provided ", length(sample.fraction), ".")
    }
    if (!replace & any(sample.fraction * length(response) > table(response))) {
      idx <- which(sample.fraction * length(response) > table(response))[1]
      stop("Error: Not enough samples in class ", names(idx), 
           "; available: ", table(response)[idx], 
           ", requested: ", (sample.fraction * length(response))[idx], ".")
    }
    if (!is.null(case.weights)) {
      stop("Error: Combination of case.weights and class-wise sampling not supported.")
    }
  } else {
    if (sample.fraction <= 0 || sample.fraction > 1) {
      stop("Error: Invalid value for sample.fraction. Please give a value in (0,1] or a vector of values in [0,1].")
    }
  }
  
  ## Importance mode TO DO
  if (is.null(importance) || importance == "none") {
    importance.mode <- 0
  } else if (importance == "impurity") {
    importance.mode <- 1
  } else if (importance == "impurity_corrected" || importance == "impurity_unbiased") {
    importance.mode <- 5
  } else if (importance == "unity") {
    importance.mode <- 3
  } else {
    stop("Error: Unknown importance mode.")
  }
  
  ## Case weights: NULL for no weights
  if (is.null(case.weights)) {
    case.weights <- c(0,0)
    use.case.weights <- FALSE
  } else {
    use.case.weights <- TRUE
    
    if (!replace && sum(case.weights > 0) < sample.fraction * nrow(data.final)) {
      stop("Error: Fewer non-zero case weights than observations to sample.")
    }
  }
  
  ## Manual inbag selection
  if (is.null(inbag)) {
    inbag <- list(c(0,0))
    use.inbag <- FALSE
  } else if (is.list(inbag)) {
    use.inbag <- TRUE
    if (use.case.weights) {
      stop("Error: Combination of case.weights and inbag not supported.")
    }
    if (length(sample.fraction) > 1) {
      stop("Error: Combination of class-wise sampling and inbag not supported.")
    }
    if (length(inbag) != num.trees) {
      stop("Error: Size of inbag list not equal to number of trees.")
    }
  } else {
    stop("Error: Invalid inbag, expects list of vectors of size num.trees.")
  }
  
  ## Class weights: NULL for no weights (all 1)
  if (is.null(class.weights)) {
    class.weights <- rep(1, nlevels(response))
  } else {
    if (!(treetype %in% c(1, 9))) {
      stop("Error: Argument class.weights only valid for classification forests.")
    }
    if (!is.numeric(class.weights) || any(class.weights < 0)) {
      stop("Error: Invalid value for class.weights. Please give a vector of non-negative values.")
    }
    if (length(class.weights) != nlevels(response)) {
      stop("Error: Number of class weights not equal to number of classes.")
    }
    
    ## Reorder (C++ expects order as appearing in the data)
    class.weights <- class.weights[unique(as.numeric(response))]
  }
  
  ## Splitting rule
  if (treetype == 3) {
    splitrule <- "variance"
  } else if (treetype %in% c(1, 9)) {
    splitrule <- "gini"
  }
  splitrule.num <- 1
  
  ## Prediction mode always false. Use predict.unityfor() method.
  prediction.mode <- FALSE
  predict.all <- FALSE
  prediction.type <- 1
  
  ## No loaded forest object
  loaded.forest <- list()
  
  ## Use sparse matrix
  if ("dgCMatrix" %in% class(data.final)) {
    sparse.data <- data.final
    data.final <- matrix(c(0, 0))
    use.sparse.data <- TRUE
  } else {
    sparse.data <- Matrix(matrix(c(0, 0)))
    use.sparse.data <- FALSE
  }
  
  order.snps <- FALSE
  
  repr.tree.mode <- FALSE
  repr.var.names <- ""
  
  ## Clean up
  ####rm("data.selected")
  
  ## Call divfor
  
  result <- divforCpp(treetype, dependent.variable.name, data.final, variable.names, mtry.sprout,
                      num.trees, verbose, seed=runif(1 , 0, .Machine$integer.max), num.threads, write.forest, importance.mode,
                      min.node.size, min.node.size.root, split_select_weights=list(c(0,0)), use_split_select_weights=FALSE,
                      always_split_variable_names=c("0", "0"), use_always_split_variable_names=FALSE,
                      status_variable_name="", prediction.mode, loaded.forest, snp.data,
                      replace, probability, unordered_variable_names=c("0", "0"), use_unordered_variable_names=FALSE, 
                      save.memory, splitrule.num, case.weights, use.case.weights, class.weights, 
                      predict.all, keep.inbag, sample.fraction, alpha=0.5, minprop=0.1, holdout = FALSE, prediction.type, 
                      num_random_splits=1, sparse.data, use.sparse.data, order.snps, oob.error, max.depth, max.depth.root, num.cand.trees,
                      inbag, use.inbag, prop.var.root,
                      prop.best.splits, repr.tree.mode, repr.var.names)
  
  if (length(result) == 0) {
    stop("User interrupt or internal error.")
  }
  
  ## Prepare results
  if (importance.mode != 0) {
    names(result$variable.importance) <- all.independent.variable.names
  }
  
  ## Set predictions
  if (treetype == 1 && is.factor(response) && oob.error) {
    result$predictions <- integer.to.factor(result$predictions,
                                            levels(response))
    true.values <- integer.to.factor(unlist(data.final[, dependent.variable.name]),
                                     levels(response))
    result$confusion.matrix <- table(true.values, result$predictions, 
                                     dnn = c("true", "predicted"), useNA = "ifany")
  } else if (treetype == 9 && !is.matrix(data) && oob.error) {
    if (is.list(result$predictions)) {
      result$predictions <- do.call(rbind, result$predictions)
    } 
    if (is.vector(result$predictions)) {
      result$predictions <- matrix(result$predictions, nrow = 1)
    }
    
    ## Set colnames and sort by levels
    colnames(result$predictions) <- unique(response)
    if (is.factor(response)) {
      result$predictions <- result$predictions[, levels(droplevels(response)), drop = FALSE]
    }
  }
  
  ## Splitrule
  result$splitrule <- splitrule
  
  ## Set treetype
  if (treetype == 1) {
    result$treetype <- "Classification"
  } else if (treetype == 3) {
    result$treetype <- "Regression"
  } else if (treetype == 9) {
    result$treetype <- "Probability estimation"
  }
  if (treetype == 3) {
    result$r.squared <- 1 - result$prediction.error / var(response)
  }
  result$call <- sys.call()
  result$importance.mode <- importance
  result$num.samples <- nrow(data.final)
  result$replace <- replace
  
  ## Write forest object
  if (write.forest) {
    if (is.factor(response)) {
      result$forest$levels <- levels(response)
    }
    result$forest$independent.variable.names <- independent.variable.names
    result$forest$treetype <- result$treetype
    class(result$forest) <- "unityfor.forest"
    
    ## In 'ordered' mode, save covariate levels
    if (respect.unordered.factors == "order" && !is.matrix(data)) {
      result$forest$covariate.levels <- covariate.levels
    }
  }
  
  result$num.cand.trees <- num.cand.trees
  result$max.depth.root <- max.depth.root
  result$min.node.size.root <- min.node.size.root

  result$data <- data.selected
  result$prop.best.splits <- prop.best.splits
  result$prop.var.root <- prop.var.root
  #result$prediction <- NULL
  
  # Reorder the elements of the "result" list so that they have a more meaningful order:
  
  res_names_all <- c("forest", "predictions", "data", "variable.importance", "importance.mode", "prop.best.splits", "prediction.error",
                     "confusion.matrix", "r.squared", "call", "num.trees", "num.cand.trees",
                     "num.independent.variables", "num.samples", "prop.var.root", "mtry",
                     "max.depth.root", "min.node.size.root", "min.node.size", "splitrule", "replace",
                     "treetype")
  res_names <- names(result)
  
  ind_cl <- which(res_names %in% res_names_all)
  
  res_names_sub <- res_names[ind_cl]
  res_names_all_sub <- res_names_all[res_names_all %in% res_names]
  
  reorderind <- as.numeric(factor(res_names_all_sub, levels=res_names_sub))
  
  new_order <- 1:length(result)
  new_order[ind_cl] <- ind_cl[reorderind]
  
  result <- result[new_order]
  
  
  class(result) <- "unityfor"
  
  
  return(result)
}
