% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/LearningCurve.R
\name{createLearningCurve}
\alias{createLearningCurve}
\title{createLearningCurve}
\usage{
createLearningCurve(
  plpData,
  outcomeId,
  parallel = TRUE,
  cores = 4,
  modelSettings,
  saveDirectory = NULL,
  analysisId = "learningCurve",
  populationSettings = createStudyPopulationSettings(),
  splitSettings = createDefaultSplitSetting(),
  trainFractions = c(0.25, 0.5, 0.75),
  trainEvents = NULL,
  sampleSettings = createSampleSettings(),
  featureEngineeringSettings = createFeatureEngineeringSettings(),
  preprocessSettings = createPreprocessSettings(minFraction = 0.001, normalize = TRUE),
  logSettings = createLogSettings(),
  executeSettings = createExecuteSettings(runSplitData = TRUE, runSampleData = FALSE,
    runFeatureEngineering = FALSE, runPreprocessData = TRUE, runModelDevelopment = TRUE,
    runCovariateSummary = FALSE)
)
}
\arguments{
\item{plpData}{An object of type \code{plpData} - the patient level prediction
data extracted from the CDM.}

\item{outcomeId}{(integer) The ID of the outcome.}

\item{parallel}{Whether to run the code in parallel}

\item{cores}{The number of computer cores to use if running in parallel}

\item{modelSettings}{An object of class \code{modelSettings} created using one of the function:
\itemize{
\item \code{setLassoLogisticRegression()} A lasso logistic regression model
\item \code{setGradientBoostingMachine()} A gradient boosting machine
\item \code{setAdaBoost()} An ada boost model
\item \code{setRandomForest()} A random forest model
\item \code{setDecisionTree()} A decision tree model
\item \code{setKNN()} A KNN model
}}

\item{saveDirectory}{The path to the directory where the results will be saved (if NULL uses working directory)}

\item{analysisId}{(integer) Identifier for the analysis. It is used to create, e.g., the result folder. Default is a timestamp.}

\item{populationSettings}{An object of type \code{populationSettings} created using \code{createStudyPopulationSettings} that
specifies how the data class labels are defined and addition any exclusions to apply to the
plpData cohort}

\item{splitSettings}{An object of type \code{splitSettings} that specifies how to split the data into train/validation/test.
The default settings can be created using \code{createDefaultSplitSetting}.}

\item{trainFractions}{A list of training fractions to create models for.
Note, providing \code{trainEvents} will override your input to
\code{trainFractions}.}

\item{trainEvents}{Events have shown to be determinant of model performance.
Therefore, it is recommended to provide \code{trainEvents} rather than
\code{trainFractions}. Note, providing \code{trainEvents} will override
your input to \code{trainFractions}. The format should be as follows:
\itemize{
\item \code{c(500, 1000, 1500) } - a list of training events
}}

\item{sampleSettings}{An object of type \code{sampleSettings} that specifies any under/over sampling to be done.
The default is none.}

\item{featureEngineeringSettings}{An object of \code{featureEngineeringSettings} specifying any feature engineering to be learned (using the train data)}

\item{preprocessSettings}{An object of \code{preprocessSettings}. This setting specifies the minimum fraction of
target population who must have a covariate for it to be included in the model training
and whether to normalise the covariates before training}

\item{logSettings}{An object of \code{logSettings} created using \code{createLogSettings}
specifying how the logging is done}

\item{executeSettings}{An object of \code{executeSettings} specifying which parts of the analysis to run}
}
\value{
A learning curve object containing the various performance measures
obtained by the model for each training set fraction. It can be plotted
using \code{plotLearningCurve}.
}
\description{
Creates a learning curve object, which can be plotted using the
\code{plotLearningCurve()} function.
}
\examples{
\dontshow{if (rlang::is_installed("parallel")) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
\donttest{ \dontshow{ # takes too long }
data("simulationProfile")
plpData <- simulatePlpData(simulationProfile, n = 1800, seed = 42)
outcomeId <- 3
modelSettings <- setLassoLogisticRegression(seed=42)
learningCurve <- createLearningCurve(plpData, outcomeId, modelSettings = modelSettings,
saveDirectory = file.path(tempdir(), "learningCurve"), parallel = FALSE)
# clean up
unlink(file.path(tempdir(), "learningCurve"), recursive = TRUE)
}
\dontshow{\}) # examplesIf}
}
