To speed up compute time for normalization ~ 10-fold, set
fast.km = TRUE
.
Specify this argument with either the original method using empty
droplets DSBNormalizeProtein()
, or the dsb method that only
requires the raw counts for cells and no empty drops
ModelNegativeADTnorm()
.
Below is a benchmark showing 3 normalizations on a 1e6 cell
benchmark
How to use the fast method for datasets without empty drops:
isotypes = c("MouseIgG1kappaisotype_PROT", "MouseIgG2akappaisotype_PROT",
"Mouse IgG2bkIsotype_PROT", "RatIgG2bkIsotype_PROT")
norm.adt = ModelNegativeADTnorm(
cell_protein_matrix = cells_citeseq_mtx,
fast.km = TRUE,
denoise.counts = TRUE,
use.isotype.control = TRUE,
isotype.control.name.vec = isotypes
)
How to use the fast method for datasets with empty droplets specified (see main vignette):
norm.adt = DSBNormalizeProtein(
cell_protein_matrix = dsb::raw.adt.matrix,
empty_drop_matrix = dsb::empty_drop_citeseq_mtx,
fast.km = TRUE,
denoise.counts = TRUE,
use.isotype.control = TRUE,
isotype.control.name.vec = isotypes
)
Differences in the resulting normalized values between the two methods:
r = "deepskyblue3"
library(dsb)
# specify isotypes
isotypes.names = rownames(cells_citeseq_mtx)[67:70]
norm = DSBNormalizeProtein(
# set fast.km = TRUE to run the fast method
fast.km = TRUE,
cell_protein_matrix = dsb::cells_citeseq_mtx,
empty_drop_matrix = dsb::empty_drop_citeseq_mtx,
denoise.counts = TRUE,
use.isotype.control = TRUE,
isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70],
)
# original method
norm.original = dsb::DSBNormalizeProtein(
cell_protein_matrix = dsb::cells_citeseq_mtx,
empty_drop_matrix = dsb::empty_drop_citeseq_mtx,
denoise.counts = TRUE,
use.isotype.control = TRUE,
isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70],
)
n.original = norm.original$dsb_normalized_matrix
n.fast = norm$dsb_normalized_matrix
# individual correlations
par(mfrow=c(1,2))
plot(n.original['CD8_PROT', ], n.fast['CD8_PROT', ],
pch = 16,
font.main = 1,
col = adjustcolor(r, alpha.f = 0.2),
cex = 0.6,
xlab = "dsb original",
ylab = "dsb km.fast",
main = 'CD8 Normalized ADT'
)
plot(n.original['CD4_PROT', ], n.fast['CD4_PROT', ],
pch = 16, font.main = 1, cex = 0.6,
col = adjustcolor(r, alpha.f = 0.2),
xlab = "dsb original",
ylab = "dsb km.fast",
main = 'CD4 Normalized ADT'
)
Correlation of normalized values:
correlations <- sapply(seq_len(nrow(n.original)), function(x){
cor(n.original[x, ], n.fast[x, ], method = 'pearson')
})
# plot
hist(correlations, breaks = 20, xlim = c(0.97, 1),
main = "correlation per protein\n km.fast vs original method",
font.main = 1,
xlab = "Pearson correlation", freq = FALSE, col = "lightgray", border = "white")
rug(correlations)