% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CalSVG_MERINGUE.R
\name{CalSVG_MERINGUE}
\alias{CalSVG_MERINGUE}
\title{MERINGUE: Moran's I based Spatially Variable Gene Detection}
\usage{
CalSVG_MERINGUE(
  expr_matrix,
  spatial_coords,
  network_method = c("delaunay", "knn"),
  k = 10L,
  filter_dist = NA,
  alternative = c("greater", "less", "two.sided"),
  adjust_method = "BH",
  min_pct_cells = 0.05,
  n_threads = 1L,
  use_cpp = TRUE,
  verbose = TRUE
)
}
\arguments{
\item{expr_matrix}{Numeric matrix of gene expression values.
\itemize{
  \item Rows: genes
  \item Columns: spatial locations (spots/cells)
  \item Values: normalized expression (e.g., log-transformed counts)
}
Row names should be gene identifiers; column names should match
row names of \code{spatial_coords}.}

\item{spatial_coords}{Numeric matrix of spatial coordinates.
\itemize{
  \item Rows: spatial locations (must match columns of expr_matrix)
  \item Columns: coordinate dimensions (x, y, and optionally z)
}}

\item{network_method}{Character string specifying how to construct the
spatial neighborhood network.
\itemize{
  \item \code{"delaunay"} (default): Delaunay triangulation. Creates natural
    neighbors based on geometric triangulation. Good for relatively uniform
    spatial distributions.
  \item \code{"knn"}: K-nearest neighbors. Each spot connected to its k
    nearest neighbors. More robust for irregular distributions.
}}

\item{k}{Integer. Number of neighbors for KNN method. Default is 10.
Ignored when \code{network_method = "delaunay"}.
\itemize{
  \item Smaller k (e.g., 5-6): More local patterns, faster computation
  \item Larger k (e.g., 15-20): Broader patterns, smoother results
}}

\item{filter_dist}{Numeric or NA. Maximum Euclidean distance for neighbors.
Pairs with distance > filter_dist are not considered neighbors.
Default is NA (no filtering). Useful for:
\itemize{
  \item Removing long-range spurious connections
  \item Focusing on local spatial patterns
}}

\item{alternative}{Character string specifying the alternative hypothesis
for the Moran's I test.
\itemize{
  \item \code{"greater"} (default): Test for positive autocorrelation
    (clustering of similar values). Most appropriate for SVG detection.
  \item \code{"less"}: Test for negative autocorrelation (dissimilar
    values as neighbors).
  \item \code{"two.sided"}: Test for any autocorrelation.
}}

\item{adjust_method}{Character string specifying p-value adjustment method
for multiple testing correction. Passed to \code{p.adjust()}.
Options include: "BH" (default, Benjamini-Hochberg), "bonferroni",
"holm", "hochberg", "hommel", "BY", "fdr", "none".}

\item{min_pct_cells}{Numeric (0-1). Minimum fraction of cells that must
contribute to the spatial pattern for a gene to be retained as SVG.
Default is 0.05 (5%). Uses LISA (Local Indicators of Spatial Association)
to filter genes driven by only a few outlier cells.
Set to 0 to disable this filter.}

\item{n_threads}{Integer. Number of threads for parallel computation.
Default is 1.
\itemize{
  \item For large datasets: Set to number of available cores
  \item Uses R's parallel::mclapply (not available on Windows)
}}

\item{use_cpp}{Logical. Whether to use C++ implementation for faster
computation. Default is TRUE. Falls back to R if C++ fails.}

\item{verbose}{Logical. Whether to print progress messages. Default is TRUE.}
}
\value{
A data.frame with SVG detection results, sorted by significance.
  Columns:
  \itemize{
    \item \code{gene}: Gene identifier
    \item \code{observed}: Observed Moran's I statistic. Range: [-1, 1].
      Positive values indicate clustering, negative indicate dispersion.
    \item \code{expected}: Expected Moran's I under null (approximately -1/(n-1))
    \item \code{sd}: Standard deviation under null hypothesis
    \item \code{z_score}: Standardized test statistic (observed - expected) / sd
    \item \code{p.value}: Raw p-value from normal approximation
    \item \code{p.adj}: Adjusted p-value (multiple testing corrected)
  }
}
\description{
Detect spatially variable genes using the MERINGUE approach based on
Moran's I spatial autocorrelation statistic.

Identifies spatially variable genes by computing Moran's I spatial
autocorrelation statistic for each gene. Genes with significant positive
spatial autocorrelation (similar expression values clustering together)
are identified as SVGs.
}
\details{
\strong{Method Overview:}

MERINGUE uses Moran's I, a classic measure of spatial autocorrelation:
\deqn{I = \frac{n}{W} \frac{\sum_i \sum_j w_{ij}(x_i - \bar{x})(x_j - \bar{x})}{\sum_i (x_i - \bar{x})^2}}

where:
\itemize{
  \item n = number of spatial locations
  \item W = sum of all spatial weights
  \item w_ij = spatial weight between locations i and j
  \item x_i = expression value at location i
}

\strong{Interpretation:}
\itemize{
  \item I > 0: Positive autocorrelation (similar values cluster)
  \item I = 0: Random spatial distribution
  \item I < 0: Negative autocorrelation (checkerboard pattern)
}

\strong{Statistical Testing:}
P-values are computed using normal approximation based on analytical
formulas for the expected value and variance of Moran's I under the
null hypothesis of complete spatial randomness.

\strong{Computational Considerations:}
\itemize{
  \item Time complexity: O(n^2) for network construction, O(n*m) for testing
    (n = spots, m = genes)
  \item Memory: O(n^2) for storing spatial weights matrix
  \item For n > 10,000 spots, consider using KNN with small k
}
}
\examples{
# Load example data
data(example_svg_data)
expr <- example_svg_data$logcounts[1:20, ]  # Use subset for speed
coords <- example_svg_data$spatial_coords

\donttest{
# Basic usage (requires RANN package for KNN)
if (requireNamespace("RANN", quietly = TRUE)) {
    results <- CalSVG_MERINGUE(expr, coords, 
                               network_method = "knn", k = 10,
                               verbose = FALSE)
    head(results)

    # Get significant SVGs
    sig_genes <- results$gene[results$p.adj < 0.05]
}
}

}
\references{
\itemize{
  \item Miller, B.F. et al. (2021) Characterizing spatial gene expression
    heterogeneity in spatially resolved single-cell transcriptomic data
    with nonuniform cellular densities. Genome Research.
  \item Moran, P.A.P. (1950) Notes on Continuous Stochastic Phenomena.
    Biometrika.
  \item Cliff, A.D. and Ord, J.K. (1981) Spatial Processes: Models &
    Applications. Pion.
}
}
\seealso{
\code{\link{CalSVG}} for unified interface,
\code{\link{buildSpatialNetwork}} for network construction,
\code{\link{moranI_test}} for individual gene testing
}
