% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/RcppExports.R
\name{bdSplit_matrix_hdf5}
\alias{bdSplit_matrix_hdf5}
\title{Split HDF5 Dataset into Submatrices}
\usage{
bdSplit_matrix_hdf5(
  filename,
  group,
  dataset,
  outgroup = NULL,
  outdataset = NULL,
  nblocks = NULL,
  blocksize = NULL,
  bycols = TRUE,
  overwrite = FALSE
)
}
\arguments{
\item{filename}{Character string. Path to the HDF5 file.}

\item{group}{Character string. Path to the group containing input dataset.}

\item{dataset}{Character string. Name of the dataset to split.}

\item{outgroup}{Character string (optional). Output group path. If NULL,
uses input group.}

\item{outdataset}{Character string (optional). Base name for output datasets.
If NULL, uses input dataset name with block number suffix.}

\item{nblocks}{Integer (optional). Number of blocks to split into.
Mutually exclusive with blocksize.}

\item{blocksize}{Integer (optional). Size of each block.
Mutually exclusive with nblocks.}

\item{bycols}{Logical (optional). Whether to split by columns (TRUE) or
rows (FALSE). Default is TRUE.}

\item{overwrite}{Logical (optional). Whether to overwrite existing datasets.
Default is FALSE.}
}
\value{
List with components. If an error occurs, all string values are returned as empty strings (""):
\describe{
\item{fn}{Character string with the HDF5 filename}
\item{ds}{Character string with the output group path where the split
datasets are stored. Multiple datasets are created in this location named
as \<outdataset\>.1, \<outdataset\>.2, etc.}
}
}
\description{
Splits a large dataset in an HDF5 file into smaller submatrices, with
support for both row-wise and column-wise splitting.
}
\details{
This function provides efficient dataset splitting capabilities with:
\itemize{
\item Splitting options:
\itemize{
\item Row-wise or column-wise splitting
\item Fixed block size splitting
\item Fixed block count splitting
}
\item Implementation features:
\itemize{
\item Memory-efficient processing
\item Block-based operations
\item Safe file operations
\item Progress reporting
}
}

The function supports two splitting strategies:
\enumerate{
\item By number of blocks: Splits the dataset into a specified number of
roughly equal-sized blocks
\item By block size: Splits the dataset into blocks of a specified size
}
}
\examples{
\dontrun{
library(BigDataStatMeth)

# Create test data
data <- matrix(rnorm(1000), 100, 10)

# Save to HDF5
fn <- "test.hdf5"
bdCreate_hdf5_matrix(fn, data, "data", "matrix1",
                     overwriteFile = TRUE)

# Split by number of blocks
bdSplit_matrix_hdf5(
  filename = fn,
  group = "data",
  dataset = "matrix1",
  outgroup = "data_split",
  outdataset = "block",
  nblocks = 4,
  bycols = TRUE
)

# Split by block size
bdSplit_matrix_hdf5(
  filename = fn,
  group = "data",
  dataset = "matrix1",
  outgroup = "data_split2",
  outdataset = "block",
  blocksize = 25,
  bycols = TRUE
)

# Cleanup
if (file.exists(fn)) {
  file.remove(fn)
}
}

}
\references{
\itemize{
\item The HDF Group. (2000-2010). HDF5 User's Guide.
}
}
\seealso{
\itemize{
\item \code{\link{bdCreate_hdf5_matrix}} for creating HDF5 matrices
}
}
