Bonus materials: mofa
To get Ricard Argelaguet's details about integration methods please check his thesis here Download check his thesis here.
Function to regress out technical covariates (click to expand)
RegressOutMatrix <- function(mtx, covariates = NULL, features_idx = NULL, verbose = TRUE) {
# Check features_idx
if (is.null(features_idx)) {
features_idx <- 1:nrow(mtx)
}
if (is.character(features_idx)) {
features_idx <- intersect(features_idx, rownames(mtx))
if (length(features_idx) == 0) {
stop("Cannot use features that are beyond the scope of mtx")
}
} else if (max(features_idx) > nrow(mtx)) {
stop("Cannot use features that are beyond the scope of mtx")
}
# Check dataset dimensions
if (nrow(covariates) != ncol(mtx)) {
stop("Uneven number of cells between latent data and expression data")
}
# Subset
mtx <- mtx[features_idx,]
mtx.dimnames <- dimnames(mtx)
# Create formula for regression
vars.to.regress <- colnames(covariates)
fmla <- paste('GENE ~', paste(vars.to.regress, collapse = '+')) %>% as.formula
# In this code, we'll repeatedly regress different Y against the same X
# (covariates) in order to calculate residuals. Rather that repeatedly
# call lm to do this, we'll avoid recalculating the QR decomposition for the
# covariates matrix each time by reusing it after calculating it once
regression.mat <- cbind(covariates, mtx[1,])
colnames(regression.mat) <- c(colnames(covariates), "GENE")
qr <- lm(fmla, data = regression.mat, qr = TRUE)$qr
rm(regression.mat)
# Make results matrix
data.resid <- matrix(
nrow = nrow(mtx),
ncol = ncol(mtx)
)
if (verbose) pb <- txtProgressBar(char = '=', style = 3, file = stderr())
# Extract residuals from each feature by using the pre-computed QR decomposition
for (i in 1:length(features_idx)) {
regression.mat <- cbind(covariates, mtx[features_idx[i], ])
colnames(regression.mat) <- c(vars.to.regress, 'GENE')
regression.mat <- qr.resid(qr = qr, y = mtx[features_idx[i],]) # The function qr.resid returns the residuals when fitting y to the matrix with QR decomposition.
data.resid[i, ] <- regression.mat
if (verbose) {
setTxtProgressBar(pb = pb, value = i / length(features_idx))
}
}
if (verbose) close(con = pb)
dimnames(data.resid) <- mtx.dimnames
return(data.resid)
}