The goal of is this vignette is to illustrate the R package cases by some elementary code examples.
Load the package:
library(cases)
Often, binary predictions are not readily available but rather need to be derived from continuous (risk) scores. This can be done via the categorize function.
# real data example from publication here
set.seed(123)
<- as.data.frame(mvtnorm::rmvnorm(10, mean=rep(0, 3), sigma=2*diag(3)))
M
M#> V1 V2 V3
#> 1 -0.79263226 -0.3255201 2.2043464
#> 2 0.09971392 0.1828405 2.4254682
#> 3 0.65183395 -1.7890668 -0.9713566
#> 4 -0.63026120 1.7311131 0.5088536
#> 5 0.56677642 0.1565290 -0.7860781
#> 6 2.52707679 0.7040669 -2.7812167
#> 7 0.99186703 -0.6686280 -1.5101308
#> 8 -0.30826308 -1.4509894 -1.0308079
#> 9 -0.88393901 -2.3853446 1.1848098
#> 10 0.21690234 -1.6095687 1.7731621
## categorize at 0 by default
<- categorize(M)
yhat
yhat#> V1_0 V2_0 V3_0
#> 1 0 0 1
#> 2 1 1 1
#> 3 1 0 0
#> 4 0 1 1
#> 5 1 1 0
#> 6 1 1 0
#> 7 1 0 0
#> 8 0 0 0
#> 9 0 0 1
#> 10 1 0 1
## define multiple cutpoints to define multiple decision rules per marker
<- c(0, 1, 0, 1, 0, 1)
C <- c(1, 1, 2, 2, 3, 3)
a categorize(M, C, a)
#> V1_0 V1_1 V2_0 V2_1 V3_0 V3_1
#> 1 0 0 0 0 1 1
#> 2 1 0 1 0 1 1
#> 3 1 0 0 0 0 0
#> 4 0 0 1 1 1 0
#> 5 1 0 1 0 0 0
#> 6 1 1 1 0 0 0
#> 7 1 0 0 0 0 0
#> 8 0 0 0 0 0 0
#> 9 0 0 0 0 1 1
#> 10 1 0 0 0 1 1
## this can even be used to do multi-class classification, like this:
<- matrix(rep(c(-1, 0, 1, -2, 0, 2), 3), ncol=3, byrow = TRUE)
C
C#> [,1] [,2] [,3]
#> [1,] -1 0 1
#> [2,] -2 0 2
#> [3,] -1 0 1
#> [4,] -2 0 2
#> [5,] -1 0 1
#> [6,] -2 0 2
categorize(M, C, a)
#> V1_a V1_b V2_a V2_b V3_a V3_b
#> 1 1 1 1 1 3 3
#> 2 2 2 2 2 3 3
#> 3 2 2 0 1 1 1
#> 4 1 1 3 2 2 2
#> 5 2 2 2 2 1 1
#> 6 3 3 2 2 0 0
#> 7 2 2 1 1 0 1
#> 8 1 1 0 1 0 1
#> 9 1 1 0 0 3 2
#> 10 2 2 0 1 3 2
In supervised classification, it is assumed that we have a true set of labels. In medical testing, this is usually called the reference standard provided by an established diagnostic/prognostic tool. We need to compare model predictions against these labels in order to compute model accuracy.
## consider binary prediction from 3 models from previous r chunk
names(yhat) <- paste0("rule", 1:ncol(yhat))
yhat#> rule1 rule2 rule3
#> 1 0 0 1
#> 2 1 1 1
#> 3 1 0 0
#> 4 0 1 1
#> 5 1 1 0
#> 6 1 1 0
#> 7 1 0 0
#> 8 0 0 0
#> 9 0 0 1
#> 10 1 0 1
## assume true labels
<- c(rep(1, 5), rep(0, 5))
y
## compare then results in
compare(yhat, y)
#> $specificity
#> rule1 rule2 rule3
#> 6 0 0 1
#> 7 0 1 1
#> 8 1 1 1
#> 9 1 1 0
#> 10 0 1 0
#>
#> $sensitivity
#> rule1 rule2 rule3
#> 1 0 0 1
#> 2 1 1 1
#> 3 1 0 0
#> 4 0 1 1
#> 5 1 1 0
Main function of the package
evaluate(compare(yhat, y))
#> [cases] evaluation results:
#> $specificity
#> parameter hypothesis estimate lower upper pval pval_all reject reject_all
#> 1 rule1 == 0.75 0.4 0.0080 0.7920 0.0401 0.2266 FALSE FALSE
#> 2 rule2 == 0.75 0.8 0.4799 1.1201 0.3797 0.3797 FALSE FALSE
#> 3 rule3 == 0.75 0.6 0.2080 0.9920 0.2266 0.2266 FALSE FALSE
#>
#> $sensitivity
#> parameter hypothesis estimate lower upper pval pval_all reject reject_all
#> 1 rule1 == 0.75 0.6 0.208 0.992 0.2266 0.2266 FALSE FALSE
#> 2 rule2 == 0.75 0.6 0.208 0.992 0.2266 0.3797 FALSE FALSE
#> 3 rule3 == 0.75 0.6 0.208 0.992 0.2266 0.2266 FALSE FALSE
More details on the dta function are provided in the last section
cases includes a few functions for synthetic data generation
draw_data_lfc(n=20)
#> $specificity
#> model1 model2 model3 model4 model5 model6 model7 model8 model9 model10
#> [1,] 1 1 1 1 1 1 1 1 1 1
#> [2,] 1 1 1 1 1 0 1 1 1 1
#> [3,] 0 1 1 1 1 1 1 1 1 1
#> [4,] 1 1 1 1 1 1 1 1 1 1
#> [5,] 1 1 0 1 1 1 1 1 1 1
#> [6,] 1 1 1 1 1 1 0 1 0 1
#> [7,] 1 1 1 1 1 1 1 1 1 1
#> [8,] 1 1 0 1 1 1 1 1 1 1
#> [9,] 1 1 1 1 1 0 1 1 1 1
#> [10,] 1 1 1 1 1 1 1 1 1 1
#>
#> $sensitivity
#> model1 model2 model3 model4 model5 model6 model7 model8 model9 model10
#> [1,] 1 1 1 1 1 1 1 1 1 1
#> [2,] 1 1 1 1 1 1 1 1 1 1
#> [3,] 1 1 1 1 1 1 1 1 1 0
#> [4,] 1 1 1 1 1 1 1 1 1 0
#> [5,] 1 1 1 1 0 1 1 1 1 1
#> [6,] 1 1 1 1 0 1 1 1 1 1
#> [7,] 1 1 1 1 1 1 1 0 1 0
#> [8,] 1 1 1 0 1 1 1 1 1 1
#> [9,] 1 1 1 1 1 1 1 0 1 1
#> [10,] 1 1 1 1 1 1 1 1 1 0
#>
#> attr(,"info")
#> model b se sp
#> 1 model1 TRUE 1.0 0.8
#> 2 model2 FALSE 0.8 1.0
#> 3 model3 TRUE 1.0 0.8
#> 4 model4 FALSE 0.8 1.0
#> 5 model5 FALSE 0.8 1.0
#> 6 model6 TRUE 1.0 0.8
#> 7 model7 TRUE 1.0 0.8
#> 8 model8 FALSE 0.8 1.0
#> 9 model9 TRUE 1.0 0.8
#> 10 model10 FALSE 0.8 1.0
draw_data_roc(n=20)
#> $specificity
#> model1 model2 model3 model4 model5 model6 model7 model8 model9 model10
#> [1,] 1 0 1 0 1 1 1 1 1 1
#> [2,] 1 1 1 1 1 1 1 1 1 1
#> [3,] 0 1 1 1 1 1 1 1 1 1
#> [4,] 0 0 1 0 1 1 1 1 1 1
#> [5,] 1 1 1 0 0 1 1 1 1 1
#> [6,] 1 1 0 1 1 1 1 1 1 1
#> [7,] 0 1 1 1 1 1 1 1 1 1
#> [8,] 1 1 0 0 1 1 1 1 1 1
#> [9,] 1 1 1 1 1 1 1 1 1 1
#> [10,] 0 1 1 1 1 1 1 1 1 1
#>
#> $sensitivity
#> model1 model2 model3 model4 model5 model6 model7 model8 model9 model10
#> [1,] 1 0 1 1 1 1 1 1 1 0
#> [2,] 1 0 0 0 0 0 0 0 0 0
#> [3,] 1 0 1 1 1 1 1 1 0 0
#> [4,] 1 1 1 1 1 1 1 1 1 1
#> [5,] 1 1 1 1 1 1 1 1 1 1
#> [6,] 1 0 1 1 1 1 1 1 1 1
#> [7,] 1 1 1 1 1 1 1 1 1 1
#> [8,] 1 1 1 1 1 1 1 1 1 1
#> [9,] 0 1 1 1 1 1 1 1 1 1
#> [10,] 0 1 1 1 1 1 1 1 1 1
#>
#> attr(,"info")
#> model auc cutoff se sp
#> 1 model1 0.850 0.7026076 0.7773072 0.7588499
#> 2 model2 0.875 0.9744355 0.7429298 0.8350798
#> 3 model3 0.900 0.7414402 0.8579035 0.7707867
#> 4 model4 0.925 0.6637751 0.9149729 0.7465829
#> 5 model5 0.925 0.8579379 0.8805752 0.8045366
#> 6 model6 0.950 0.5861100 0.9590761 0.7210992
#> 7 model7 0.950 0.9744355 0.9117706 0.8350798
#> 8 model8 0.950 1.0909332 0.8916296 0.8623489
#> 9 model9 0.950 1.1685983 0.8764814 0.8787172
#> 10 model10 0.950 1.4792588 0.8014789 0.9304644
Remark: Synthetic data comes at the ‘compared’ level meaning the labels 1 and 0 indicate correct and false predictions, respectively. No need to compare() in addition.
The pipe operator ‘%>%’ allows us to chain together subsequent operations in R. This is useful, as the dta function expects preprocessed data indicating correct (1) and false (0) predictions.
%>%
M categorize() %>%
compare(y) %>%
evaluate()
#> [cases] evaluation results:
#> $specificity
#> parameter hypothesis estimate lower upper pval pval_all reject reject_all
#> 1 V1_0 == 0.75 0.4 0.0080 0.7920 0.0401 0.2266 FALSE FALSE
#> 2 V2_0 == 0.75 0.8 0.4799 1.1201 0.3797 0.3797 FALSE FALSE
#> 3 V3_0 == 0.75 0.6 0.2080 0.9920 0.2266 0.2266 FALSE FALSE
#>
#> $sensitivity
#> parameter hypothesis estimate lower upper pval pval_all reject reject_all
#> 1 V1_0 == 0.75 0.6 0.208 0.992 0.2266 0.2266 FALSE FALSE
#> 2 V2_0 == 0.75 0.6 0.208 0.992 0.2266 0.3797 FALSE FALSE
#> 3 V3_0 == 0.75 0.6 0.208 0.992 0.2266 0.2266 FALSE FALSE
The R command
?evaluate
gives an overview over the function arguments of the evaluate function.
Together this implies the hypotheses system that is considered, namely
\(H_0: \forall g \forall j: \theta_j^g \leq \theta_0^g\)
In the application of primary interest, diagnostic accuracy studies, this simplifies to \(G=2\) with \(\theta_1 = Se\) and \(\theta_2 =Sp\) indicating sensitivity and specificity of a medical test or classication rule. In this case we aim to reject the global null hypothesis
\(H_0: \forall j: Se_j \leq Se_0 \wedge Sp_j \leq Sp_0\)
In the following, we highlight the difference between the “co-primary” analysis (comparison regions) and a “full” analysis (confidence regions).
set.seed(1337)
<- draw_data_roc(n=120, prev=c(0.25, 0.75), m=4,
data delta=0.05, e=10, auc=seq(0.90, 0.95, 0.025), rho=c(0.25, 0.25))
head(data)
#> $specificity
#> model1 model2 model3 model4
#> [1,] 1 1 1 1
#> [2,] 0 0 1 1
#> [3,] 1 1 1 1
#> [4,] 1 1 1 1
#> [5,] 1 1 1 1
#> [6,] 1 1 1 1
#> [7,] 1 1 1 1
#> [8,] 1 1 1 1
#> [9,] 1 1 1 1
#> [10,] 1 1 1 1
#> [11,] 1 1 1 1
#> [12,] 1 1 1 1
#> [13,] 1 1 1 1
#> [14,] 1 1 1 1
#> [15,] 1 1 0 0
#> [16,] 0 0 0 0
#> [17,] 0 1 1 1
#> [18,] 1 1 1 1
#> [19,] 1 1 1 1
#> [20,] 1 1 0 0
#> [21,] 1 1 1 1
#> [22,] 0 0 1 1
#> [23,] 1 1 1 1
#> [24,] 1 1 1 1
#> [25,] 1 1 1 1
#> [26,] 1 1 1 1
#> [27,] 0 0 1 1
#> [28,] 1 1 1 1
#> [29,] 1 1 1 1
#> [30,] 1 1 0 0
#> [31,] 1 1 1 1
#> [32,] 1 1 1 1
#> [33,] 1 1 1 1
#> [34,] 1 1 1 1
#> [35,] 0 0 1 1
#> [36,] 1 1 1 1
#> [37,] 0 0 1 1
#> [38,] 1 1 1 1
#> [39,] 1 1 1 1
#> [40,] 1 1 1 1
#> [41,] 1 1 1 1
#> [42,] 1 1 1 1
#> [43,] 1 1 1 1
#> [44,] 0 0 1 1
#> [45,] 1 1 1 1
#> [46,] 1 1 1 1
#> [47,] 1 1 1 1
#> [48,] 1 1 1 1
#> [49,] 1 1 1 1
#> [50,] 0 0 1 1
#> [51,] 1 1 1 1
#> [52,] 1 1 1 1
#> [53,] 1 1 1 1
#> [54,] 1 1 1 1
#> [55,] 1 1 1 1
#> [56,] 1 1 1 1
#> [57,] 1 1 1 1
#> [58,] 1 1 1 1
#> [59,] 1 1 1 1
#> [60,] 1 1 1 1
#> [61,] 1 1 1 1
#> [62,] 0 1 1 1
#> [63,] 1 1 1 1
#> [64,] 1 1 1 1
#> [65,] 0 1 1 1
#> [66,] 1 1 1 1
#> [67,] 0 0 1 1
#> [68,] 1 1 1 1
#> [69,] 1 1 1 1
#> [70,] 1 1 1 1
#> [71,] 1 1 1 1
#> [72,] 1 1 1 1
#> [73,] 0 1 0 0
#> [74,] 1 1 1 1
#> [75,] 1 1 1 1
#> [76,] 0 0 1 1
#> [77,] 1 1 1 1
#> [78,] 0 1 1 1
#> [79,] 1 1 1 1
#> [80,] 1 1 1 1
#> [81,] 1 1 1 1
#> [82,] 0 0 1 1
#> [83,] 0 1 1 1
#> [84,] 1 1 1 1
#> [85,] 1 1 1 1
#> [86,] 1 1 1 1
#> [87,] 1 1 1 1
#> [88,] 1 1 1 1
#> [89,] 1 1 1 1
#> [90,] 1 1 1 1
#>
#> $sensitivity
#> model1 model2 model3 model4
#> [1,] 0 0 1 1
#> [2,] 1 1 1 1
#> [3,] 1 1 1 1
#> [4,] 1 1 0 0
#> [5,] 1 1 1 1
#> [6,] 1 1 1 1
#> [7,] 1 1 1 1
#> [8,] 1 1 1 1
#> [9,] 1 1 1 1
#> [10,] 1 0 1 1
#> [11,] 1 1 1 0
#> [12,] 1 1 1 1
#> [13,] 1 1 0 0
#> [14,] 1 1 0 0
#> [15,] 1 1 1 1
#> [16,] 1 1 1 1
#> [17,] 1 1 1 1
#> [18,] 1 1 0 0
#> [19,] 1 1 0 0
#> [20,] 1 1 1 1
#> [21,] 1 1 1 1
#> [22,] 1 1 1 1
#> [23,] 1 1 1 1
#> [24,] 1 1 1 1
#> [25,] 1 1 1 1
#> [26,] 1 1 1 1
#> [27,] 1 1 1 1
#> [28,] 1 1 1 1
#> [29,] 1 0 0 0
#> [30,] 1 1 1 1
## comparison regions
<- data %>% evaluate(alternative ="greater",
results_comp alpha=0.025,
benchmark = c(0.7, 0.8),
analysis = "co-primary",
regu = TRUE,
adj = "maxt")
visualize(results_comp)
## confidence regions
<- data %>% evaluate(alternative = "greater",
results_conf alpha = 0.025,
benchmark = c(0.7, 0.8),
analysis = "full",
regu = TRUE,
adj = "maxt")
visualize(results_conf)
As we can see, the comparison regions are more liberal compared to the confidence regions.
A second vignette shows an application of the cases package to the Breast Cancer Wisconsin Diagnostic (wdbc) data set.
vignette("example_wdbc", "cases")