## Paul Johnson ## March 9, 2011 ## An R speed question arose recently in R-devel email list. ## Does the explicit use of name spaces, as in ## base::print ## make programs run slower than just ## print ##? ## And, if it is slow for R to look up functions in their furthermore, if it is true it is slow to use ## base::print, could we cache the "function lookup" ## Dominick Sampieri said yes, "::" is slower: ## " ## Since the resolution of myPkg::foo() occurs at runtime (via a function ## call) instead ## of at compile time (as it would in C++), this practice can introduce a ## significant ## performance hit. This can be avoided by doing something like: ## mylocalfunc <- myPkg::foo ## [tight loop that uses mylocalfunc repeatedly] ## " ## Several people wondered "can that possibly be right?" ## and evidence that it might be was offered by ## Hadley Wickham, Feb 16, 2011 r-devel ## " ## > system.time(replicate(1e4, base::print)) ## user system elapsed ## 0.539 0.001 0.541 ## > system.time(replicate(1e4, print)) ## user system elapsed ## 0.013 0.000 0.012 ## So it is (relatively) significant, although it's not going to make an ## impact unless you're doing thousands of function calls. ## " ## There's a methodology problem here, since Hadley was using ## "replicate", and that imposes some overhead by itself (the time to ## find replicate is counted as well. ## Olaf Mersmann (Feb 16, 2011 r-devel) pointed that out, and that ## difference caused by "::" is actually more dramatic. ## Olaf described his package "microbenchmark" ## " ## Using this package I get: ## > library("microbenchmark") ## > res <- microbenchmark(print, base::print, times=10000) ## > res ## Unit: nanoeconds ## I've fixed the typo, but not pushed to CRAN ## min lq median uq max ## print 57 65 68.0 69 48389 ## base::print 41763 43357 44278.5 48403 4749851 ## A better way to look at this is by converting to evaluations per second: ## > print(res, unit="eps") ## Unit: evaluations per second ## min lq median uq max ## print 17543859.65 15384615.38 14705882.35 14492753.62 20665.8538 ## base::print 23944.64 23064.33 22584.32 20659.88 210.5329 ## Resolving 23000 names per second or ~15M ist quite a dramatic ## difference in my world. ## So, it appears there is a HUGE HUGE difference beween calling ## "print" and "base::print". ## What does that mean for me, and my students who want to run simulations? ## The only thing I'm sure of is that chasing speedups is a very ## frustrating exercise. ## Lets repeat this exercise, but actually make some calculations, ## instead of calling a function that does nothing. x <- rnorm(20) y <- rnorm(20) system.time(replicate(1e4, stats::lm(y~x))) system.time(replicate(1e4, lm(y~x))) ###. Hmm. Not so much difference. library(microbenchmark) mb1 <- microbenchmark( lm(y~x), stats::lm(y~x), times=1e4) mb1 print(mb1, unit="eps") mylm1 <- function(x,y) mod1 <- stats::lm(y~x) mylm2 <- function(x,y) mod2 <- lm(y~x) mb2 <- microbenchmark( mylm1(x,y), mylm2(x,y), stats::lm(y~x), lm(y~x), times=1e4) print(mb2, unit="eps") ##Hmm. hardly any difference localLm <- stats::lm localLm(y~x) mb3 <- microbenchmark(stats::lm(y~x), localLm(y~x) , times=1e4) print(mb3, unit="eps") localLm.fit <- stats::lm xmat <- cbind(1,x) mb4 <- microbenchmark(stats::lm.fit(xmat,y), lm.fit(xmat,y) , times=1e4) print(mb4, unit="eps") t1_start <- Sys.time() lst1 <- vector("list", 1e4) for( i in 1:1e4) { mod1 <- stats::lm(y~x) lst1[i] <- mod1 } t1_stop <- Sys.time() print(sprintf("for loop, Lots of lookups took %6.2f seconds", t1_stop - t1_start)) t1_start <- Sys.time() lst1 <- vector("list", 1e4) for( i in 1:1e4) { mod1 <- stats::lm(y~x) lst1[i] <- mod1 } t1_stop <- Sys.time() print(sprintf("for loop, Lots of lookups took %6.2f seconds", t1_stop - t1_start)) t2_start <- Sys.time() lst2 <- list() myLm <- stats::lm for( i in 1:1e4){ mod2 <- myLm(y ~ x) } t2_stop <- Sys.time() print(sprintf("Cached lookup took %6.2f seconds", t2_stop - t2_start)) t0_start <- Sys.time() for( i in 1:10000) { x <- rnorm(100) y <- rnorm(100) mod1 <- lm(y~x) } t0_stop <- Sys.time() print(sprintf("lm took %6.2f seconds", t0_stop - t0_start)) t1_start <- Sys.time() for( i in 1:10000) { x <- rnorm(100) y <- rnorm(100) mod1 <- stats::lm(y~x) } t1_stop <- Sys.time() print(sprintf("Lots of lookups took %6.2f seconds", t1_stop - t1_start)) t2_start <- Sys.time() myLm <- stats::lm for( i in 1:10000){ x <- rnorm(100) y <- rnorm(100) mod2 <- myLm(y ~ x) } t2_stop <- Sys.time() print(sprintf("Cached lookup took %6.2f seconds", t2_stop - t2_start)) The timings obtained by ## > system.time(replicate(1e4, base::print)) ## User System verstrichen ## 0.475 0.006 0.483 ## > system.time(replicate(1e4, print)) ## User System verstrichen ## 0.011 0.001 0.014 ## are skewed by the overhead of replicate() in this case because the ## execution time of the expression under test is so short. ##