% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataprep.R
\name{dataprep}
\alias{dataprep}
\title{Occurrence matrix (e.g., species by site) data preparation for affinity() function}
\usage{
dataprep(
  data,
  row.or.col,
  which.row.or.col = NULL,
  datatype = NULL,
  threshold = NULL,
  class0.rule = NULL
)
}
\arguments{
\item{data}{occurrence matrix (binary or abundance) in matrix or dataframe format}

\item{row.or.col}{specify if the pairs of rows or columns are analyzed for affinity. 'row' or 'column'.}

\item{which.row.or.col}{a vector of name or the number of row/column if a subset of the data is intended to be analyzed; optional argument with default of all rows/columns.}

\item{datatype}{specify if the datatype is 'abundance' or 'binary'; optional argument with default 'binary'.}

\item{threshold}{cutoff for converting an abundance data to binary; needed if datatype is 'abundance'}

\item{class0.rule}{'less.or.equal' or 'less'. 'less.or.equal' converts a threshold or lower values to zero and all the others to 1. 'less' converts a threshold and higher values to 1.}
}
\value{
A dataframe in binary 1/0 format ready to be analyzed by affinity(). Abundance data is converted to binary.
A subset of the input data is returned if certain rows or columns selected.
If rows are being analyzed for affinity between pairs, they are brought to columns by transposing the data.
}
\description{
This function checks the format of the data for its appropriateness, converts abundance to binary and subsets the data for the selected columns or rows.
Note that the affinity can be computed between columns or between rows. In the latter case, the dataset is transposed to bring rows into the columns.
}
\details{
This function does the following:
\enumerate{
\item checks if the supplied data is in matrix or dataframe formats which are the acceptable formats
\item if rows are selected for affinity analysis, it transposes the dataframe
\item subsets the data if specific columns or rows are selected for analysis; the selection can be made with number or name of the rows/columns
\item checks if the selected cols/rows are in numeric or integer format or not
\item checks if the selected cols/rows have data in binary 1/0 format or not; if datatype is specified as abundance, it converts it to binary format following the supplied rule
}
}
\examples{
matrix.data <- matrix(1:40, nrow = 10, ncol = 4)

row.names(matrix.data) <- paste0("id_", 1:nrow(matrix.data))
colnames(matrix.data) <- paste0("variable_", 1:ncol(matrix.data))

# add some missing data and zero abundance
matrix.data[1,1] <- matrix.data[2,3] <- matrix.data[1,4] <- matrix.data[1,2] <- NA
matrix.data[10,4] <- 0
matrix.data
# abundance data with some missing and some zero occurrences

# some good examples
dataprep(data = matrix.data, row.or.col = "col", datatype = "abundance",
         threshold = 9, class0.rule = "less")
dataprep(data = matrix.data, row.or.col = "row", which.row.or.col = c("id_2", "id_4"),
         datatype = "abundance", threshold = 10, class0.rule = "less")
dataprep(data = matrix.data, row.or.col = "col", which.row.or.col = c("variable_1", "variable_4"),
         datatype = "abundance", threshold = 8, class0.rule = "less")
dataprep(data = matrix.data, row.or.col = "col",
         which.row.or.col = c("variable_1", "variable_3", "variable_4"),
         datatype = "abundance", threshold = 8, class0.rule = "less.or.equal")
dataprep(data = matrix.data, row.or.col = "row", datatype = "abundance",
         threshold = 10, class0.rule = "less")
dataprep(data = matrix.data, row.or.col = "col", datatype = "abundance",
         threshold = 10, class0.rule = "less")

# bad examples of specifying the rows or cols that are not in the data
\dontrun{
  dataprep(data = matrix.data, row.or.col = "row",
           which.row.or.col = c("id_1", "id_4", "id_11", "id_39"), datatype = "abundance",
           threshold = 10, class0.rule = "less")
  dataprep(data = matrix.data, row.or.col = "row", which.row.or.col = c(4,7,17),
           datatype = "abundance", threshold = 10, class0.rule = "less")
  dataprep(data = matrix.data, row.or.col = "col", which.row.or.col = 2:12, datatype = "abundance",
           threshold = 10, class0.rule = "less")
  dataprep(data = matrix.data, row.or.col = "col",
           which.row.or.col = c("variable_1", "variable_9", "variable_6"), datatype = "abundance",
           threshold = 10, class0.rule = "less")
}


# what if you pick just one column or row
\dontrun{
  dataprep(data = matrix.data, row.or.col = "row", which.row.or.col = c("id_4"),
           datatype = "abundance", threshold = 10, class0.rule = "less")
}

# the function fails when a required argument is missing
\dontrun{
  dataprep(data = matrix.data, row.or.col = "col", which.row.or.col = c("variable_1", "variable_4"),
           datatype = "abundance", threshold = 10)
  dataprep(data = matrix.data, row.or.col = "col", which.row.or.col = c("variable_1", "variable_4"),
           datatype = "abundance", class0.rule = "less.or.equal")
  dataprep(data = matrix.data, row.or.col = "col", which.row.or.col = c("variable_1", "variable_4"),
           datatype = "abundance")
}

# what if you have abundance data but do not specify the datatype
\dontrun{
  dataprep(data = matrix.data, row.or.col = "col", which.row.or.col = c("variable_1", "variable_4"))
}

# however, if it is a binary data, it's okay to not specify the datatype
# although specifying is a good practice
matrix.bindata <- dataprep(data = matrix.data, row.or.col = "col", datatype = "abundance",
                           threshold = 9, class0.rule = "less")
matrix.bindata
dataprep(data = matrix.bindata, row.or.col = "col")
dataprep(data = matrix.bindata, row.or.col = "row")
}
\author{
Kumar Mainali
}
