``````### Paul Johnson
### June 21, 2010

### Suppose you have a data frame with some variables, and you have
### another variable that is a list of some "special" variable names.
### How can you carry out some regression for all of the "special"
### variables.

### Here's the example data frame
mydf <- data.frame("x1" = rnorm(100), "x2" = rnorm(100),
"y1" = rpois(100, lambda = 18),
"y2" = rpois(100, lambda = 3),
"y3" = rpois(100, lambda =  1),
"y4" = rpois(100, lambda = 1))

### For instance, suppose we want a regression for each dep. variable here:
specialVarNames <- c("y1","y2")

### First, figure how to run one particular element from
### specialVarNames

### 2 ways to run a regression for the first element of
### specialVarNames, which is "y1" (specialVarNames[1])

### 1. figure out which column has name [1] and grab that column
mod1 <- lm(mydf[ , which(colnames(mydf) == specialVarNames[1])] ~ x1 + x2, data = mydf)
summary(mod1)
``````
``````##
## Call:
## lm(formula = mydf[, which(colnames(mydf) == specialVarNames[1])] ~
##     x1 + x2, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -9.402 -3.185 -0.375  2.402 14.034
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   17.688      0.450   39.29   <2e-16 ***
## x1             0.243      0.412    0.59     0.56
## x2            -0.141      0.446   -0.32     0.75
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 97 degrees of freedom
## Multiple R-squared:  0.00521,    Adjusted R-squared:  -0.0153
## F-statistic: 0.254 on 2 and 97 DF,  p-value: 0.776
``````
``````### Lacks finesse, but clear. It is the same as

magicColumnNumber <- which(colnames(mydf) == specialVarNames[1])
mod1 <- lm(mydf[,magicColumnNumber] ~ x1 + x2, data = mydf)
summary(mod1)
``````
``````##
## Call:
## lm(formula = mydf[, magicColumnNumber] ~ x1 + x2, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -9.402 -3.185 -0.375  2.402 14.034
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   17.688      0.450   39.29   <2e-16 ***
## x1             0.243      0.412    0.59     0.56
## x2            -0.141      0.446   -0.32     0.75
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 97 degrees of freedom
## Multiple R-squared:  0.00521,    Adjusted R-squared:  -0.0153
## F-statistic: 0.254 on 2 and 97 DF,  p-value: 0.776
``````
``````### 2. use the fact that a data frame is a list and items can
### be accessed as  mydf[["variableName"]].
mod2 <- lm(mydf[[specialVarNames[1]]] ~ x1 + x2, data = mydf)
summary(mod2)
``````
``````##
## Call:
## lm(formula = mydf[[specialVarNames[1]]] ~ x1 + x2, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -9.402 -3.185 -0.375  2.402 14.034
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   17.688      0.450   39.29   <2e-16 ***
## x1             0.243      0.412    0.59     0.56
## x2            -0.141      0.446   -0.32     0.75
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 97 degrees of freedom
## Multiple R-squared:  0.00521,    Adjusted R-squared:  -0.0153
## F-statistic: 0.254 on 2 and 97 DF,  p-value: 0.776
``````
``````### Same as
myVName <- specialVarNames[1]
mod2 <- lm( mydf[[myVName]] ~ x1 + x2, data = mydf)
summary(mod2)
``````
``````##
## Call:
## lm(formula = mydf[[myVName]] ~ x1 + x2, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -9.402 -3.185 -0.375  2.402 14.034
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   17.688      0.450   39.29   <2e-16 ***
## x1             0.243      0.412    0.59     0.56
## x2            -0.141      0.446   -0.32     0.75
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 97 degrees of freedom
## Multiple R-squared:  0.00521,    Adjusted R-squared:  -0.0153
## F-statistic: 0.254 on 2 and 97 DF,  p-value: 0.776
``````
``````### 3. perhaps the best, most elegant. A "formula" is a text
### string. So we can create the formula first, then run:
myNewFormula <- paste( eval(specialVarNames[1]), "~ x1 + x2")

mod3 <-lm(myNewFormula, data = mydf)
summary(mod3)
``````
``````##
## Call:
## lm(formula = myNewFormula, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -9.402 -3.185 -0.375  2.402 14.034
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   17.688      0.450   39.29   <2e-16 ***
## x1             0.243      0.412    0.59     0.56
## x2            -0.141      0.446   -0.32     0.75
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 97 degrees of freedom
## Multiple R-squared:  0.00521,    Adjusted R-squared:  -0.0153
## F-statistic: 0.254 on 2 and 97 DF,  p-value: 0.776
``````
``````### Now do same work for each element in specialVarNames.
### Any clod could write a for loop. Let's be smarter
### and use lapply.

### First, create a function:
myFunc <- function(aString = NULL, dat = NULL){
myNewFormula <- paste( eval(aString), "~ x1 + x2")
mod3 <-lm( myNewFormula, data = mydf)
}

allRegs <- lapply(specialVarNames, function(X) myFunc(aString = X, dat = mydf))

### Just check item 2 in there
summary(allRegs[[2]])
``````
``````##
## Call:
## lm(formula = myNewFormula, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -3.426 -1.175 -0.139  1.222  5.908
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   3.1040     0.1922   16.15   <2e-16 ***
## x1            0.1259     0.1758    0.72     0.48
## x2           -0.0692     0.1904   -0.36     0.72
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.84 on 97 degrees of freedom
## Multiple R-squared:  0.00744,    Adjusted R-squared:  -0.013
## F-statistic: 0.364 on 2 and 97 DF,  p-value: 0.696
``````
``````### Just "parameters"? Do:
coef(summary(allRegs[[2]]))
``````
``````##             Estimate Std. Error t value  Pr(>|t|)
## (Intercept)  3.10397     0.1922 16.1467 3.064e-29
## x1           0.12588     0.1758  0.7160 4.757e-01
## x2          -0.06916     0.1904 -0.3633 7.172e-01
``````
``````### If you name the items in the regression list, then
### can access by name:
names(allRegs) <- specialVarNames

summary(allRegs[["y2"]])
``````
``````##
## Call:
## lm(formula = myNewFormula, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -3.426 -1.175 -0.139  1.222  5.908
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   3.1040     0.1922   16.15   <2e-16 ***
## x1            0.1259     0.1758    0.72     0.48
## x2           -0.0692     0.1904   -0.36     0.72
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.84 on 97 degrees of freedom
## Multiple R-squared:  0.00744,    Adjusted R-squared:  -0.013
## F-statistic: 0.364 on 2 and 97 DF,  p-value: 0.696
``````
``````### Can dump out list of all summaries:
lapply(allRegs, function(X) summary(X))
``````
``````## \$y1
##
## Call:
## lm(formula = myNewFormula, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -9.402 -3.185 -0.375  2.402 14.034
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   17.688      0.450   39.29   <2e-16 ***
## x1             0.243      0.412    0.59     0.56
## x2            -0.141      0.446   -0.32     0.75
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.32 on 97 degrees of freedom
## Multiple R-squared:  0.00521,    Adjusted R-squared:  -0.0153
## F-statistic: 0.254 on 2 and 97 DF,  p-value: 0.776
##
##
## \$y2
##
## Call:
## lm(formula = myNewFormula, data = mydf)
##
## Residuals:
##    Min     1Q Median     3Q    Max
## -3.426 -1.175 -0.139  1.222  5.908
##
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   3.1040     0.1922   16.15   <2e-16 ***
## x1            0.1259     0.1758    0.72     0.48
## x2           -0.0692     0.1904   -0.36     0.72
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.84 on 97 degrees of freedom
## Multiple R-squared:  0.00744,    Adjusted R-squared:  -0.013
## F-statistic: 0.364 on 2 and 97 DF,  p-value: 0.696
``````
``````### Can easily massage output into a matrix of
### particulars from each models.
``````