set.seed(1234)
l <- list(
A=sample(0:1, size=10, replace=TRUE),
B=runif(10),
C=sample(letters)
)
l
## $A
## [1] 0 1 1 1 1 1 0 0 1 1
##
## $B
## [1] 0.6935913 0.5449748 0.2827336 0.9234335 0.2923158 0.8372956 0.2862233
## [8] 0.2668208 0.1867228 0.2322259
##
## $C
## [1] "i" "h" "d" "a" "e" "r" "k" "u" "o" "w" "y" "x" "v" "g" "c" "z" "l"
## [18] "j" "p" "f" "s" "m" "b" "n" "q" "t"
(Hint: use lapply()
and sort()
).
set.seed(1234)
x <- matrix(sample(0:1, size=1000*5, replace=TRUE), nrow=1000, ncol=5)
produce a vector containing the sum of the columns of x.
sapply()
Revisit the solutions from exercise 1 above, now with the bytecode compiler.
sapply()
Remember that the bytecode compiler isn’t nearly as clever as your typical C/C++ compiler. Consider these two functions:
f <- function (A, Q){
n <- ncol (A)
for (i in 1:n){
tA <- t(A)
Y <- tA %*% Q
Q <- qr.Q (qr(Y))
Y <- A %*% Q
Q <- qr.Q(qr(Y))
}
Q
}
g <- function (A, Q){
n <- ncol (A)
tA <- t(A)
for (i in 1:n){
Y <- tA %*% Q
Q <- qr.Q (qr(Y))
Y <- A %*% Q
Q <- qr.Q(qr(Y))
}
Q
}
Conceptually, these two functions do the same thing, but g()
is more efficient than f()
. We can easily show this on some randomly generated data:
nrows <- 100
ncols <- 100
A <- matrix(rnorm(nrows*ncols), nrow=nrows, ncol=ncols)
Q <- matrix(0, nrows, ncols)
library(rbenchmark)
benchmark(f=f(A, Q), g=g(A, Q), replications=10, order="relative",
columns=c("test", "elapsed", "relative"))
## test elapsed relative
## 1 f 4.648 1.00
## 2 g 4.693 1.01
Compiling with the bytecode compiler may improve the overall performance, but won’t fix the underlying problem. Use the disassemble()
function on these functions to convince yourself that f()
is still doing unnecessary operations.
lapply(l, sort)
## $A
## [1] 0 0 0 1 1 1 1 1 1 1
##
## $B
## [1] 0.1867228 0.2322259 0.2668208 0.2827336 0.2862233 0.2923158 0.5449748
## [8] 0.6935913 0.8372956 0.9234335
##
## $C
## [1] "a" "b" "c" "d" "e" "f" "g" "h" "i" "j" "k" "l" "m" "n" "o" "p" "q"
## [18] "r" "s" "t" "u" "v" "w" "x" "y" "z"
### loop
sums <- numeric(5)
for (j in 1:5){
for (i in 1:1000){
sums[j] <- sums[j] + x[i, j]
}
}
sums
## [1] 518 485 515 481 508
### apply
apply(x, 2, sum)
## [1] 518 485 515 481 508
### vectorized
colSums(x)
## [1] 518 485 515 481 508
sqrt_loop_noinit <- function(n)
{
ret <- c()
for (i in 1:n)
ret[i] <- sqrt(i)
return(ret)
}
sqrt_loop_withinit <- function(n)
{
ret <- numeric(n)
for (i in 1:n)
ret[i] <- sqrt(i)
return(ret)
}
sqrt_lapply <- function(n) lapply(1:n, sqrt)
sqrt_vec <- function(n) sqrt(1:n)
Benchmarking them for n=10000
, the clear victor is using vectorization:
library(rbenchmark)
n <- 10000
benchmark(sqrt_loop_noinit(n), sqrt_loop_withinit(n), sqrt_lapply(n), sqrt_vec(n),
order="relative", columns=c("test", "elapsed", "relative"))
## test elapsed relative
## 4 sqrt_vec(n) 0.009 1.000
## 3 sqrt_lapply(n) 0.362 40.222
## 2 sqrt_loop_withinit(n) 0.741 82.333
## 1 sqrt_loop_noinit(n) 9.448 1049.778
To get a sense for possible performance improvements of lapply()
over for loops, we can just compare these two:
benchmark(sqrt_loop_withinit(n), sqrt_lapply(n), order="relative",
columns=c("test", "elapsed", "relative"))
## test elapsed relative
## 2 sqrt_lapply(n) 0.327 1.000
## 1 sqrt_loop_withinit(n) 0.742 2.269
library(compiler)
sqrt_loop_noinit <- cmpfun(sqrt_loop_noinit)
sqrt_loop_withinig <- cmpfun(sqrt_loop_withinit)
sqrt_lapply <- cmpfun(sqrt_lapply)
sqrt_vec <- cmpfun(sqrt_vec)
benchmark(sqrt_loop_noinit(n), sqrt_loop_withinit(n), sqrt_lapply(n), sqrt_vec(n),
order="relative", columns=c("test", "elapsed", "relative"))
## test elapsed relative
## 4 sqrt_vec(n) 0.008 1.000
## 3 sqrt_lapply(n) 0.366 45.750
## 2 sqrt_loop_withinit(n) 0.741 92.625
## 1 sqrt_loop_noinit(n) 8.919 1114.875
And again just comparnig the loop with lapply()
:
benchmark(sqrt_loop_withinit(n), sqrt_lapply(n), order="relative",
columns=c("test", "elapsed", "relative"))
## test elapsed relative
## 2 sqrt_lapply(n) 0.333 1.000
## 1 sqrt_loop_withinit(n) 0.743 2.231
### sapply
div_by_5_or_17 <- function(n)
{
if (n %%5 == 0 || n %% 17 == 0)
return(TRUE)
else
return(FALSE)
}
div_sapply <- function(n) sum(sapply(1:n, div_by_5_or_17))
### Vectorization
div_vec <- function(n)
{
numbers <- 1:n
sum((numbers %% 5 == 0) | (numbers %% 17 == 0))
}
library(rbenchmark)
n <- 100000
benchmark(sapply=div_sapply(n), vec=div_vec(n))
## test replications elapsed relative user.self sys.self user.child
## 1 sapply 100 13.580 28.65 13.576 0.004 0
## 2 vec 100 0.474 1.00 0.476 0.000 0
## sys.child
## 1 0
## 2 0