% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/fit_IVDML.R
\name{fit_IVDML}
\alias{fit_IVDML}
\title{Fitting Double Machine Learning Models with Instrumental Variables and Potentially Heterogeneous Treatment Effect}
\usage{
fit_IVDML(
  Y,
  D,
  Z,
  X = NULL,
  A = NULL,
  ml_method,
  ml_par = list(),
  A_deterministic_X = TRUE,
  K_dml = 5,
  iv_method = c("linearIV", "mlIV"),
  S_split = 1
)
}
\arguments{
\item{Y}{Numeric vector. Response variable.}

\item{D}{Numeric vector. Treatment variable.}

\item{Z}{Matrix, vector, or data frame. Instrumental variables.}

\item{X}{Matrix, vector, or data frame. Additional covariates (default: NULL).}

\item{A}{Numeric vector. Variable with respect to which treatment effect heterogeneity is considered. Usually equal to a column of X and in this case it can also be specified later (default: NULL).}

\item{ml_method}{Character. Machine learning method to use. Options are "gam", "xgboost", and "randomForest".}

\item{ml_par}{List. Parameters for the machine learning method:
\itemize{
\item If \code{ml_method == "gam"}, can specify \code{ind_lin_Z} and \code{ind_lin_X} for components of \code{Z} and \code{X} to be modeled linearly.
\item If \code{ml_method == "xgboost"}, can specify \code{max_nrounds}, \code{k_cv}, \code{early_stopping_rounds}, and vectors \code{eta} and \code{max_depth}.
\item If \code{ml_method == "randomForest"}, can specify \code{num.trees}, \code{num_mtry} (number of different mtry values to try out) or a vector \code{mtry}, a vector \code{max.depth}, \code{num_min.node.size} (number of different min.node.size values to try out) or a vector \code{min.node.size}.
\item To specify different parameters for the different nuisance function regressions, \code{ml_par} should be a list of lists: \code{ml_par_D_XZ} (parameters for nuisance function \eqn{\mathbb E[D|Z, X]}, needed for \code{iv_method} "mlIV" and "mlIV_direct"), \code{ml_par_D_X} (parameters for nuisance function \eqn{\mathbb E[D|X]}, needed for \code{iv_method} "linearIV", "mlIV" and "mlIV_direct"), \code{ml_par_f_X} (parameters for nuisance function \eqn{\mathbb E[\widehat{\mathbb E}[D|Z, X]|X]}, needed for \code{iv_method} "mlIV"), \code{ml_par_Y_X} (parameters for nuisance function \eqn{\mathbb E[Y|X]}, needed for \code{iv_method} "linearIV", "mlIV" and "mlIV_direct"), \code{ml_par_Z_X} (parameters for nuisance function \eqn{\mathbb E[Z|X]}, needed for \code{iv_method} "linearIV").
}}

\item{A_deterministic_X}{Logical. Whether \code{A} is a deterministic function of \code{X} (default: TRUE).}

\item{K_dml}{Integer. Number of cross-fitting folds (default: 5).}

\item{iv_method}{Character vector. Instrumental variables estimation method. Options:
"linearIV", "mlIV", "mlIV_direct" (default: c("linearIV", "mlIV")). "linearIV" corresponds to using instruments linearly and "mlIV" corresponds to using machine learning instruments. "mlIV_direct" is a variant of "mlIV" that uses the same estimate of \eqn{\mathbb E[D|X]} for both the residuals \eqn{X - \mathbb E[D|X]} and \eqn{\mathbb E[D|Z, X] - \mathbb E[D|X]}, whereas "mlIV" uses a two-stage estimate of \eqn{\mathbb E[\widehat{\mathbb E}[D|Z, X]|X]} for the residuals \eqn{\mathbb E[D|Z, X] - \mathbb E[D|X]}.}

\item{S_split}{Integer. Number of sample splits for cross-fitting (default: 1).}
}
\value{
An object of class \code{IVDML}, containing:
\itemize{
\item \code{results_splits}: A list of S_split lists of cross-fitted residuals from the different sample splits.
\item \code{A}: The argument \code{A} of the function.
\item \code{ml_method}: The argument \code{ml_method} of the function.
\item \code{A_deterministic_X}: The argument \code{A_deterministic_X} of the function.
\item \code{iv_method}: The argument \code{iv_method} of the function.
The treatment effect estimates, standard errors and confidence intervals can be calculated from the \code{IVDML} object using the functions \code{\link[=coef.IVDML]{coef.IVDML()}}, \code{\link[=se]{se()}}, \code{\link[=standard_confint]{standard_confint()}}, \code{\link[=robust_confint]{robust_confint()}}.
}
}
\description{
This function is used to fit a Double Machine Learning (DML) model with Instrumental Variables (IV) with the goal to perform inference on potentially heterogeneous treatment effects. The model under study is \eqn{Y = \beta(A)D + g(X) + \epsilon}, where the error \eqn{\epsilon} is potentially correlated with the treatment \eqn{D}, but there is an IV \eqn{Z} satisfying \eqn{\mathbb E[\epsilon|Z,X] = 0}. The object of interest is the treatment effect \eqn{\beta} of the treatment \eqn{D} on the response \eqn{Y}. The treatment effect \eqn{\beta} is either constant or can depend on the univariate quantity \eqn{A}, which is typically a component of the covariates \eqn{X}.
}
\examples{
set.seed(1)
Z <- rnorm(100)
X <- Z + rnorm(100)
H <- rnorm(100)
D <- Z^2 + sin(X) + H + rnorm(100)
A <- X
Y <- tanh(A) * D + cos(X) - H + rnorm(100)
fit <- fit_IVDML(Y = Y, D = D, Z = Z, X = X, A = A, ml_method = "gam")
coef(fit, iv_method = "mlIV", a = 0, A = A, kernel_name = "boxcar", bandwidth = 0.2)

}
\references{
Cyrill Scheidegger, Zijian Guo and Peter Bühlmann. Inference for heterogeneous treatment effects with efficient instruments and machine learning. Preprint, arXiv:2503.03530, 2025.
}
\seealso{
Inference for a fitted \code{IVDML} object is done with the functions \code{\link[=coef.IVDML]{coef.IVDML()}}, \code{\link[=se]{se()}}, \code{\link[=standard_confint]{standard_confint()}} and \code{\link[=robust_confint]{robust_confint()}}.
}
