% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/reinert.R
\name{term_per_cluster}
\alias{term_per_cluster}
\title{Extract Terms and Segments for Document Clusters}
\usage{
term_per_cluster(res, cutree = NULL, k = 1, negative = TRUE)
}
\arguments{
\item{res}{A list containing the results of the Reinert clustering algorithm. Must include at least \code{dtm} (a document-term matrix) and \code{corresp_uce_uc_full} (a correspondence between segments and clusters).}

\item{cutree}{A custom cutree structure. If \code{NULL}, the default \code{cutree_reinart} is used to determine cluster membership.}

\item{k}{A vector of integers specifying the clusters to analyze. Default is \code{1}.}

\item{negative}{Logical. If \code{TRUE}, include negative terms in the results. If \code{FALSE}, exclude them. Default is \code{TRUE}.}
}
\value{
A list with the following components:
\item{terms}{A data frame of significant terms for each cluster. Columns include:
\itemize{
\item \code{chi_square}: Chi-squared statistic for the term.
\item \code{p_value}: P-value of the chi-squared test.
\item \code{sign}: Significance of the term (\code{positive}, \code{negative}, or \code{none}).
\item \code{term}: The term itself.
\item \code{freq}: Observed frequency of the term in the cluster.
\item \code{indep}: Expected frequency of the term under independence.
\item \code{cluster}: The cluster ID.
}
}
\item{segments}{A data frame of document segments associated with each cluster. Columns include:
\itemize{
\item \code{uc}: Unique segment identifier.
\item \code{doc_id}: Document ID for the segment.
\item \code{cluster}: Cluster ID.
\item \code{segment}: The text content of each segment.
}
}
}
\description{
This function processes the results of a document clustering algorithm based on the Reinert method.
It computes the terms and their significance for each cluster, as well as the associated document segments.
}
\details{
The function integrates document-term matrix rows for missing segments, calculates term statistics for each cluster,
and filters terms based on their significance. Terms can be excluded based on their significance (\code{signExcluded}).
}
\examples{

\donttest{
data(mobydick)
res <- reinert(
  x=mobydick,
  k = 10,
  term = "token",
  segment_size = 40,
  min_segment_size = 5,
  min_split_members = 10,
  cc_test = 0.3,
  tsj = 3
)

tc <- term_per_cluster(res, cutree = NULL, k=1:10, negative=FALSE)

head(tc$segments,10)

head(tc$terms,10)

}

}
