We can work an example similar to the rquery
example using a
data.table
back-end.
# data example
dL <- wrapr::build_frame(
"subjectID", "surveyCategory" , "assessmentTotal" |
1 , "withdrawal behavior", 5 |
1 , "positive re-framing", 2 |
2 , "withdrawal behavior", 3 |
2 , "positive re-framing", 4 )
scale <- 0.237
# example rquery pipeline
rquery_pipeline <- local_td(dL) %.>%
extend_nse(.,
one = 1) %.>%
extend_nse(.,
probability =
exp(assessmentTotal * scale)/
sum(exp(assessmentTotal * scale)),
count = sum(one),
partitionby = 'subjectID') %.>%
extend_nse(.,
rank = cumsum(one),
partitionby = 'subjectID',
orderby = c('probability', 'surveyCategory')) %.>%
extend_nse(.,
isdiagnosis = rank == count,
diagnosis = surveyCategory) %.>%
select_rows_nse(.,
isdiagnosis == TRUE) %.>%
select_columns(.,
c('subjectID', 'diagnosis', 'probability')) %.>%
orderby(., 'subjectID')
Show expanded form of query tree.
mk_td("dL", c(
"subjectID",
"surveyCategory",
"assessmentTotal")) %.>%
extend(.,
one := 1) %.>%
extend(.,
probability := exp(assessmentTotal * 0.237) / sum(exp(assessmentTotal * 0.237)),
count := sum(one),
partitionby = c('subjectID'),
orderby = c(),
reverse = c()) %.>%
extend(.,
rank := cumsum(one),
partitionby = c('subjectID'),
orderby = c('probability', 'surveyCategory'),
reverse = c()) %.>%
extend(.,
isdiagnosis := rank == count,
diagnosis := surveyCategory) %.>%
select_rows(.,
isdiagnosis == TRUE) %.>%
select_columns(.,
c('subjectID', 'diagnosis', 'probability')) %.>%
order_rows(.,
c('subjectID'),
reverse = c(),
limit = NULL)
Execute the calculation.
## subjectID diagnosis probability
## 1 1 withdrawal behavior 0.6706221
## 2 2 positive re-framing 0.5589742