## Paul E. Johnson ## 2013-07-10 ## I like 2 dimensional arrays: matrices. I like data frames. Two ## separate clients have asked for array help this week. If you saw ## data-array-1.R, you see I made some progress. Maybe then I'll be ## more ready to help when a problem like that comes along again if I ## work out some more array examples. ## But I still hate it. I'm scouring Internet for examples to chew on. ## ## Example 1. a 3x3x9 matrix that needs to be grouped by values of ## a factor. ## http://stackoverflow.com/questions/16135877/applying-a-function-to-a-multidimensional-array-with-grouping-variable ## User asked about working with an array. V <- 1:81 dim(V) <- c(3, 3, 9) ## fast way to create array V group <- c('a','a','a','b','b','b','c','c','c') ## User asks for advice about condensing the 3x3x9 array. ## "Given that the grouping variable has 3 levels (a, b and c), ## the result (out) I'm looking for is an array of dimension 3x3x3." ## Here's a solution that depends on the package abind library(abind) out1 <- apply(V[ , , c(1:3)], c(1, 2), sum) out2 <- apply(V[ , , c(4:6)], c(1, 2), sum) out3 <- apply(V[ , , c(7:9)], c(1, 2), sum) out <- abind(out1, out2, out3, along = 3) ## User asks for a general R solution, not depending on abind. ## I have no idea what this code means. out <- apply(V, c(1, 2), by, group, sum) ## The group and sum arguments aren't named, I can't tell what they are ## used for. So I had to go run by a whole lot of times, to figure ## out what it is doing. ## Aha. syntax: by(x, INDICES = group, FUN = sum) ## and x is a sub array extracted from V. Headache. ## Take one example from the array, choosing this piece: myx1 <- V[1, 1, ] ## Apply by to just that one bit by(myx1, INDICES = group, FUN = sum) ## Take another piece myx2 <- V[1, 2, ] by(myx2, INDICES = group, FUN = sum) ## so the apply puzzle clear up. It cycles over ## This uses aperm to take same and re-organize the dimensions. out <- aperm(apply(V, c(1, 2), by, group, sum), c(2, 3, 1)) ## OK, so I think I understand that one. ## Example 2. This is about "compressing" 2 "sections" into 1. ## I think it helps to visualize it with named dimensions and columns and rows V <- array(1:27, c(3, 3, 3), dimnames = list("myDim1" = c("A","B","C"), "myDim2" = c("G","H","I"), "myDim3" = c("X","Y","Z"))) V ## Names really help me see what is up here ## Now you can ask for pieces by name! V["A","G", ] V["A", , ] ## Suppose you want to aggregate values "X" and "Y" by summing. V[ , , "X"] V[ , , "Y"] V[ , , "X"] + V[ , , "Y"] ## Now, replace the bit that was V[ , , c("X","Y")] with that new thing. ## Appears this works, but seems dumb to me. V[ , , "X"] <- V[ , , "X"] + V[ , , "Y"] Vnew <- V[ , , c("X","Z")] Vnames <- dimnames(Vnew) Vnames$myDim3[1] <- "XandY" dimnames(Vnew) <- Vnames Vnew ## There's got to be a better way to collapse 2 slices ## Example 4. Take 3, but with a missing. ## Now, a relevant problem. What if there is an NA in one spot? V["A", "G", "X"] <- NA V V[ , , "X"] V[ , , "Y"] V[ , , "X"] + V[ , , "Y"] ## If you want a missing in there, OK. But what if you want the ## NA to be treated as zero. Harder. ## Put the "Z" part out of the way. Here's one way V[ , , -3] ## That's risky, 3 might be the wrong number. Can choose by name. V[ , , -which(dimnames(V)[[3]] == "Z")] VxAndy <- apply(V[ , , -which(dimnames(V)[[3]] == "Z")], MARGIN = c(1,2), FUN = sum, na.rm = T) VxAndy Vnew2 <- V[ , , c("X", "Z")] Vnew2[ , , "X"] <- VxAndy Vnames <- dimnames(Vnew2) Vnames$myDim3[1] <- "XandY" dimnames(Vnew2) <- Vnames Vnew2 ## Remember where we started. Vnew ## Example 3. Transpose one slice from an array. ## http://stackoverflow.com/questions/13811133/apply-a-function-to-each-layer-of-a-3d-array-returning-an-array A <- array (1:27, c(3,3,3)) A ## similar idea to previous example. Go through first and second dimensions, ## apply(A, 1:2, t) ## The plyr package helps here too. I've seen it do amazing things to ## rearrange repeated observation data from wide to long. library(plyr) aaply(A,3,t)