## Paul E. Johnson
## 2013-07-10
## I like 2 dimensional arrays: matrices. I like data frames. Two
## separate clients have asked for array help this week. If you saw
## data-array-1.R, you see I made some progress. Maybe then I'll be
## more ready to help when a problem like that comes along again if I
## work out some more array examples.
## But I still hate it. I'm scouring Internet for examples to chew on.
##
## Example 1. a 3x3x9 matrix that needs to be grouped by values of
## a factor.
## http://stackoverflow.com/questions/16135877/applying-a-function-to-a-multidimensional-array-with-grouping-variable
## User asked about working with an array.
V <- 1:81
dim(V) <- c(3, 3, 9) ## fast way to create array V
group <- c('a','a','a','b','b','b','c','c','c')
## User asks for advice about condensing the 3x3x9 array.
## "Given that the grouping variable has 3 levels (a, b and c),
## the result (out) I'm looking for is an array of dimension 3x3x3."
## Here's a solution that depends on the package abind
library(abind)
out1 <- apply(V[ , , c(1:3)], c(1, 2), sum)
out2 <- apply(V[ , , c(4:6)], c(1, 2), sum)
out3 <- apply(V[ , , c(7:9)], c(1, 2), sum)
out <- abind(out1, out2, out3, along = 3)
## User asks for a general R solution, not depending on abind.
## I have no idea what this code means.
out <- apply(V, c(1, 2), by, group, sum)
## The group and sum arguments aren't named, I can't tell what they are
## used for. So I had to go run by a whole lot of times, to figure
## out what it is doing.
## Aha. syntax: by(x, INDICES = group, FUN = sum)
## and x is a sub array extracted from V. Headache.
## Take one example from the array, choosing this piece:
myx1 <- V[1, 1, ]
## Apply by to just that one bit
by(myx1, INDICES = group, FUN = sum)
## group: a
## [1] 30
## --------------------------------------------------------
## group: b
## [1] 111
## --------------------------------------------------------
## group: c
## [1] 192
## Take another piece
myx2 <- V[1, 2, ]
by(myx2, INDICES = group, FUN = sum)
## group: a
## [1] 39
## --------------------------------------------------------
## group: b
## [1] 120
## --------------------------------------------------------
## group: c
## [1] 201
## so the apply puzzle clear up. It cycles over
## This uses aperm to take same and re-organize the dimensions.
out <- aperm(apply(V, c(1, 2), by, group, sum), c(2, 3, 1))
## OK, so I think I understand that one.
## Example 2. This is about "compressing" 2 "sections" into 1.
## I think it helps to visualize it with named dimensions and columns and rows
V <- array(1:27, c(3, 3, 3), dimnames = list("myDim1" = c("A","B","C"),
"myDim2" = c("G","H","I"),
"myDim3" = c("X","Y","Z")))
V
## , , myDim3 = X
##
## myDim2
## myDim1 G H I
## A 1 4 7
## B 2 5 8
## C 3 6 9
##
## , , myDim3 = Y
##
## myDim2
## myDim1 G H I
## A 10 13 16
## B 11 14 17
## C 12 15 18
##
## , , myDim3 = Z
##
## myDim2
## myDim1 G H I
## A 19 22 25
## B 20 23 26
## C 21 24 27
## Names really help me see what is up here
## Now you can ask for pieces by name!
V["A","G", ]
## X Y Z
## 1 10 19
V["A", , ]
## myDim3
## myDim2 X Y Z
## G 1 10 19
## H 4 13 22
## I 7 16 25
## Suppose you want to aggregate values "X" and "Y" by summing.
V[ , , "X"]
## myDim2
## myDim1 G H I
## A 1 4 7
## B 2 5 8
## C 3 6 9
V[ , , "Y"]
## myDim2
## myDim1 G H I
## A 10 13 16
## B 11 14 17
## C 12 15 18
V[ , , "X"] + V[ , , "Y"]
## myDim2
## myDim1 G H I
## A 11 17 23
## B 13 19 25
## C 15 21 27
## Now, replace the bit that was V[ , , c("X","Y")] with that new thing.
## Appears this works, but seems dumb to me.
V[ , , "X"] <- V[ , , "X"] + V[ , , "Y"]
Vnew <- V[ , , c("X","Z")]
Vnames <- dimnames(Vnew)
Vnames$myDim3[1] <- "XandY"
dimnames(Vnew) <- Vnames
Vnew
## , , myDim3 = XandY
##
## myDim2
## myDim1 G H I
## A 11 17 23
## B 13 19 25
## C 15 21 27
##
## , , myDim3 = Z
##
## myDim2
## myDim1 G H I
## A 19 22 25
## B 20 23 26
## C 21 24 27
## There's got to be a better way to collapse 2 slices
## Example 4. Take 3, but with a missing.
## Now, a relevant problem. What if there is an NA in one spot?
V["A", "G", "X"] <- NA
V
## , , myDim3 = X
##
## myDim2
## myDim1 G H I
## A NA 17 23
## B 13 19 25
## C 15 21 27
##
## , , myDim3 = Y
##
## myDim2
## myDim1 G H I
## A 10 13 16
## B 11 14 17
## C 12 15 18
##
## , , myDim3 = Z
##
## myDim2
## myDim1 G H I
## A 19 22 25
## B 20 23 26
## C 21 24 27
V[ , , "X"]
## myDim2
## myDim1 G H I
## A NA 17 23
## B 13 19 25
## C 15 21 27
V[ , , "Y"]
## myDim2
## myDim1 G H I
## A 10 13 16
## B 11 14 17
## C 12 15 18
V[ , , "X"] + V[ , , "Y"]
## myDim2
## myDim1 G H I
## A NA 30 39
## B 24 33 42
## C 27 36 45
## If you want a missing in there, OK. But what if you want the
## NA to be treated as zero. Harder.
## Put the "Z" part out of the way. Here's one way
V[ , , -3]
## , , myDim3 = X
##
## myDim2
## myDim1 G H I
## A NA 17 23
## B 13 19 25
## C 15 21 27
##
## , , myDim3 = Y
##
## myDim2
## myDim1 G H I
## A 10 13 16
## B 11 14 17
## C 12 15 18
## That's risky, 3 might be the wrong number. Can choose by name.
V[ , , -which(dimnames(V)[[3]] == "Z")]
## , , myDim3 = X
##
## myDim2
## myDim1 G H I
## A NA 17 23
## B 13 19 25
## C 15 21 27
##
## , , myDim3 = Y
##
## myDim2
## myDim1 G H I
## A 10 13 16
## B 11 14 17
## C 12 15 18
VxAndy <- apply(V[ , , -which(dimnames(V)[[3]] == "Z")], MARGIN = c(1,2), FUN = sum, na.rm = T)
VxAndy
## myDim2
## myDim1 G H I
## A 10 30 39
## B 24 33 42
## C 27 36 45
Vnew2 <- V[ , , c("X", "Z")]
Vnew2[ , , "X"] <- VxAndy
Vnames <- dimnames(Vnew2)
Vnames$myDim3[1] <- "XandY"
dimnames(Vnew2) <- Vnames
Vnew2
## , , myDim3 = XandY
##
## myDim2
## myDim1 G H I
## A 10 30 39
## B 24 33 42
## C 27 36 45
##
## , , myDim3 = Z
##
## myDim2
## myDim1 G H I
## A 19 22 25
## B 20 23 26
## C 21 24 27
## Remember where we started.
Vnew
## , , myDim3 = XandY
##
## myDim2
## myDim1 G H I
## A 11 17 23
## B 13 19 25
## C 15 21 27
##
## , , myDim3 = Z
##
## myDim2
## myDim1 G H I
## A 19 22 25
## B 20 23 26
## C 21 24 27
## Example 3. Transpose one slice from an array.
## http://stackoverflow.com/questions/13811133/apply-a-function-to-each-layer-of-a-3d-array-returning-an-array
A <- array (1:27, c(3,3,3))
A
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 4 7
## [2,] 2 5 8
## [3,] 3 6 9
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 10 13 16
## [2,] 11 14 17
## [3,] 12 15 18
##
## , , 3
##
## [,1] [,2] [,3]
## [1,] 19 22 25
## [2,] 20 23 26
## [3,] 21 24 27
## similar idea to previous example. Go through first and second dimensions,
##
apply(A, 1:2, t)
## , , 1
##
## [,1] [,2] [,3]
## [1,] 1 2 3
## [2,] 10 11 12
## [3,] 19 20 21
##
## , , 2
##
## [,1] [,2] [,3]
## [1,] 4 5 6
## [2,] 13 14 15
## [3,] 22 23 24
##
## , , 3
##
## [,1] [,2] [,3]
## [1,] 7 8 9
## [2,] 16 17 18
## [3,] 25 26 27
## The plyr package helps here too. I've seen it do amazing things to
## rearrange repeated observation data from wide to long.
library(plyr)
aaply(A,3,t)
## , , = 1
##
##
## X1 1 2 3
## 1 1 4 7
## 2 10 13 16
## 3 19 22 25
##
## , , = 2
##
##
## X1 1 2 3
## 1 2 5 8
## 2 11 14 17
## 3 20 23 26
##
## , , = 3
##
##
## X1 1 2 3
## 1 3 6 9
## 2 12 15 18
## 3 21 24 27