To JSON

There are two types of R objects we need to handle when converting to JSON, simple and complex.

Simple - scalars, vectors, matrices
Complex - data.frames and lists

I’ve categorised them this way because ‘simple’ objects don’t include any form of recursion. That is, a vector can’t contain a data.frame or a list. But a list or data.frame can contain other data.frames, vectors, matrices, scalars, lists, and any combination thereof.

Simple

Simple objects ( scalars, vectors and matrices ) get converted to JSON ARRAYS

## scalar -> single array
to_json( 1 )
#  [1.0]
to_json( "a" )
#  ["a"]

## scalar (unboxed) -> single value
to_json( 1, unbox = TRUE )
#  1.0
to_json( "a", unbox = TRUE )
#  "a"

## vector -> array
to_json( 1:4 )
#  [1,2,3,4]
to_json( letters[1:4] )
#  ["a","b","c","d"]

## named vector - array (of the elements)
to_json( c("a" = 1, "b" = 2) )
#  [1.0,2.0]

## matrix -> array of arrays (by row)
to_json( matrix(1:4, ncol = 2) )
#  [[1,3],[2,4]]
to_json( matrix(letters[1:4], ncol = 2))
#  [["a","c"],["b","d"]]

## matrix -> array of arrays (by column)
to_json( matrix(1:4, ncol = 2), by = "column" )
#  [[1,2],[3,4]]
to_json( matrix(letters[1:4], ncol = 2 ), by = "column" )
#  [["a","b"],["c","d"]]

Complex - Lists

List of unnamed vectors gives an ARRAY of ARRAYS (since a vector gets converted to an array)

to_json( list( 1:2, c("a","b") )  )
#  [[1,2],["a","b"]]

A list with named elements gives an OBJECT with named ARRAYS

## List of vectors -> object with named arrays
to_json( list( x = 1:2 ) )
#  {"x":[1,2]}

A combination of named and unnamed list elements gives both

to_json( list( x = 1:2, y = list( letters[1:2] ) ) )
#  {"x":[1,2],"y":[["a","b"]]}

Complex - Data Frames

A data.frame will, by default, treat each row as an object (to maintain the relationship inherent in a row of data )

## data.frame -> array of objects (by row) 
to_json( data.frame( x = 1:2, y = 3:4) )
#  [{"x":1,"y":3},{"x":2,"y":4}]
to_json( data.frame( x = c("a","b"), y = c("c","d")))
#  [{"x":"a","y":"c"},{"x":"b","y":"d"}]

You can set by = "column" to parse the data.frame by columns. And as each column (in this example) is a vector, each vector gets converted to an array. And since the vectors have names (the column names), we get an object of named arrays

## data.frame -> object of arrays (by column)
to_json( data.frame( x = 1:2, y = 3:4), by = "column" )
#  {"x":[1,2],"y":[3,4]}
to_json( data.frame( x = c("a","b"), y = c("c","d") ), by = "column" )
#  {"x":["a","b"],"y":["c","d"]}

Complex - Mixed objects

A data.frame where one columns is ‘AsIs’ a list

## data.frame where one colun is a list
df <- data.frame( id = 1, val = I(list( x = 1:2 ) ) )
to_json( df )
#  [{"id":1.0,"val":{"x":[1,2]}}]

The data.frame is being parsed ‘by row’, so we get an array of objects. The second column is a list of a named vector, so the val column contains an object of a named array.

Here are the individual components to show how it’s put together

## which we see is made up of
to_json( data.frame( id = 1 ) )
#  [{"id":1.0}]
## and
to_json( list( x = 1:2 ) )
#  {"x":[1,2]}

If we take the same example and parse it ‘by column’ we get the id column treated as a vector, but the list column remains the same

to_json( df, by = "column" )
#  {"id":[1.0],"val":{"x":[1,2]}}

We can build up a more complex example with nested lists inside columns of data.frames


df <- data.frame( id = 1, val = I(list(c(0,0))))
df
#    id  val
#  1  1 0, 0
to_json( df )
#  [{"id":1.0,"val":[0.0,0.0]}]

df <- data.frame( id = 1:2, val = I(list( x = 1:2, y = 3:4 ) ) )
df
#    id  val
#  x  1 1, 2
#  y  2 3, 4
to_json( df )
#  [{"id":1,"val":{"x":[1,2]}},{"id":2,"val":{"y":[3,4]}}]

df <- data.frame( id = 1:2, val = I(list( x = 1:2, y = 3:6 ) ), val2 = I(list( a = "a", b = c("b","c") ) ) )
df
#    id        val val2
#  x  1       1, 2    a
#  y  2 3, 4, 5, 6 b, c
pretty_json( df )
#  [
#      {
#          "id": 1,
#          "val": {
#              "x": [
#                  1,
#                  2
#              ]
#          },
#          "val2": {
#              "a": [
#                  "a"
#              ]
#          }
#      },
#      {
#          "id": 2,
#          "val": {
#              "y": [
#                  3,
#                  4,
#                  5,
#                  6
#              ]
#          },
#          "val2": {
#              "b": [
#                  "b",
#                  "c"
#              ]
#          }
#      }
#  ]

df <- data.frame( id = 1:2, val = I(list( x = 1:2, y = 3:6 ) ), val2 = I(list( a = "a", b = c("b","c") ) ), val3 = I(list( l = list( 1:3, l2 = c("a","b")), 1)) )
df
#    id        val val2         val3
#  x  1       1, 2    a 1:3, c("....
#  y  2 3, 4, 5, 6 b, c            1
pretty_json( df )
#  [
#      {
#          "id": 1,
#          "val": {
#              "x": [
#                  1,
#                  2
#              ]
#          },
#          "val2": {
#              "a": [
#                  "a"
#              ]
#          },
#          "val3": {
#              "l": {
#                  "": [
#                      1,
#                      2,
#                      3
#                  ],
#                  "l2": [
#                      "a",
#                      "b"
#                  ]
#              }
#          }
#      },
#      {
#          "id": 2,
#          "val": {
#              "y": [
#                  3,
#                  4,
#                  5,
#                  6
#              ]
#          },
#          "val2": {
#              "b": [
#                  "b",
#                  "c"
#              ]
#          },
#          "val3": {
#              "": [
#                  1.0
#              ]
#          }
#      }
#  ]

From JSON

Use from_json() to convert from JSON to an R object.

## scalar / vector
js <- '[1,2,3]'
from_json( js )
#  [1] 1 2 3

## matrix
js <- '[[1,2],[3,4],[5,6]]'
from_json( js )
#       [,1] [,2]
#  [1,]    1    2
#  [2,]    3    4
#  [3,]    5    6

## data.frame
js <- '[{"x":1,"y":"a"},{"x":2,"y":"b"}]'
from_json( js )
#    x y
#  1 1 a
#  2 2 b

Simplifying and NAs

By default from_json() will try and simplify

arrays to vectors
arrays of arrays to matrices
array of objects with consistent key-value pairs to data.frames

If an array contains objects with different keys, for example '[{"x":1},{"y":2}]', from_json() will not simplify this to a data.frame, because it would have to assume and insert NAs in rows where data is missing.

js <- '[{"x":1},{"y":2}]'
from_json( js )
#  [[1]]
#  [[1]]$x
#  [1] 1
#  
#  
#  [[2]]
#  [[2]]$y
#  [1] 2

You can override this default and use fill_na = TRUE to force it to a data.frame with NAs in place of missing values

js <- '[{"x":1},{"y":2}]'
from_json( js, fill_na = TRUE )
#     x  y
#  1  1 NA
#  2 NA  2

jsonify

David Cooley

2022-11-10