5. Chapter 1
1.1 R
R
•
•
•
•
• S
” ” environment R
R R
R R
1.2
R S Rick Becker,John Chambers AllanWilks Bell
S-Plus
S John Chambers
R , The New S Language: A Programming Environment
for Data Analysis and Graphics Richard A. Becker, John M. Chambers and
Allan R. Wilks 1991 S (S version 3) Statistical
Models in S edited by John M. Chambers and Trevor J. Hastie
S-Plus R R S
1
6. CHAPTER 1. 2
1.3 R
R R
R
8 R
CRAN http://cran.r-project.org
R
S R S
SAS SPSS R
R
1.4 R
R
R
R X window
UNIX Windows R
1.5 R
R >, UNIX shell
R
UNIX shell $ UNIX R
1. ,
work. .
$ mkdir work
$ cd work
2. R
$ R
3. R
4. R
> q()
R yes,no cancel(
) R
R
R 1. work R
$ cd work
$ R
7. CHAPTER 1. 3
2. R q()
Windows R
R ” ” R
1.6
R
1.7
UNIX man R
solve
>help(solve)
>?solve
if,for function
> help("[[")
”It’s im-
portant”
HTML
> help.start()
Web (UNIX netscape )
UNIX HTML
’Search Engine and Keywords’
R
help.search
?help.search
> example(topic)
Windows R
> ?help
1.8 R
R
UNIX R A a
R
( locale ) (
) . ***note 1***
(;) ( { })
(#)
8. CHAPTER 1. 4
R
+
R
1.9
UNIX Windows R
( <DEL> )
UNIX
readline Emacs
( ESS Emacs Speaks Statistics)
R R Emacs
1.10
work commands.R
R
> source("commands.R")
Windows Source File sink
> sink("record.lis")
record.lis
> sink()
1.11
R
R
( )
> objects()
( ls() ) R
workspace
rm
> rm(x, y, z, ink, junk, temp, foo, bar)
R
R R
.RData
***Note3*** R R workspace
R
x y
9. Chapter 2
2.1
R
R x 5
(10.4, 5.6, 3.1, 6.4 21.7)
> x <- c(10.4, 5.6, 3.1, 6.4, 21.7)
c() c()
1 ( ¡- ) = =
¡ ( ) - ( )
¡ -
***note 5*** assign()
> assign("x", c(10.4, 5.6, 3.1, 6.4, 21.7))
¡- assign()
> c(10.4, 5.6, 3.1, 6.4, 21.7) -> x
> 1/x
( x )
> y <- c(x, 0, x)
y 11 x 0
2.2
,
( )
> v <- 2*x + y + 1
5
10. CHAPTER 2. 6
11 v 2*x 2.2 y
1 1 11 + - * /
^ log, exp, sin, cos, tan,sqrt
max min
range 2 c(min(x),max(x))
length(x) x x sum(x) x
prod(x)
x mean(x)
var(x) sum(x)/length(x)
sum((x-mean(x))^2)/(length(x)-1) var() n*p
p*p p
sort(x) x
( order() sort.list())
max min
pmax pmin
( )
sqrt(-17)
NaN( )
sqrt(-17+0i)
2.3
R 1:30 c(1,2,. . . ,29,30)
(:) ( ) 2*1:15
c(2,4, ...,28,30) n<-10 1:n-1 1:(n-1) 30:1
seq()
5
seq(2,10) 2:10
seq() R
from=value to=value
seq(1,30), seq(from=1, to=30) seq(to=30, from=1) 1:30 seq()
by=value length=value
by=1
> seq(-5, 5, by=.2) -> s3
s3 c(-5.0, -4.8, -4.6, ..., 4.6, 4.8, 5.0)
> s4 <- seq(length=51, from=-5, by=.2)
s4
along=vector
rep()
> s5 <- rep(x, times=5)
x s5 5
11. CHAPTER 2. 7
2.4
R TRUE,
FALSE, NA (not available ) T F T F
TRUE FALSE
TRUE FALSE
> temp <- x > 13
temp x TRUE FALSE
<, <=, >, >=, == !=
c1 c2 c1 & c2 (”and”) c1 | c2
(”or”) !c1 c1
FALSE 0 TRUE 1
2.5
” ”(”not available”) ” ”(”missing value”)
NA NA
NA
is.na(x)
x NA
TRUE FALSE
> z <- c(1:3,NA); ind <- is.na(z)
x==NA is.na(x) NA
x==NA x NA
” ” Not a Number NaN
> 0/0
> Inf - Inf
NaN is.na(xx) NA NaN
TRUE is.nan(xx) NaN TRUE
2.6
R
e.g. "x-values" "New iteration results"
(’) (”) (
) R C backslash
, ” ". n, ,
t, tab b, c()
paste()
sep=string
> labs <- paste(c("X","Y"), 1:10, sep="")
12. CHAPTER 2. 8
labs
c("X1", "Y2", "X3", "Y4", "X5", "Y6", "X7", "Y8", "X9", "Y10")
|c("X","Y")|
5 1:10
2.7 (index vector);
1.
TRUE
FALSE
> y <- x[!is.na(x)]
y x
x y x
> (x+1)[(!is.na(x)) & x>0] -> z
z x+1 x
2. {1, 2, ...,length(x)}
x[6] x
> x[1:10]
x 10 ( x 10)
> c("x","y")[rep(c(1,2,2,1), times=4)]
16 "x", "y", "y", "x" 4
3.
> y <- x[-(1:5)]
y 5
4. names
> fruit <- c(5, 10, 1, 20)
> names(fruit) <- c("orange", "banana", "apple", "peach")
> lunch <- fruit[c("apple","orange")]|
13. CHAPTER 2. 9
(names)
vector[index_vector]
vector
> x[is.na(x)] <- 0
replaces any missing values in x by zeros and
> y[y < 0] <- -y[y < 0]
has the same effect as
> y <- abs(y)
2.8
R
• (matrices)
• (factors)
• (lists)
• (data frames)
” ”
( )
:
• (functions) R workspace
R
14. Chapter 3
3.1
R
( ) (numeric) (complex) (logical)
(character)
NA
character(0) numeric(0)
R (list)
”
”
(function) (expression)
R
mode(object) length(object)
attributes(object)
” ”
z 100 mode(z) ”complex” length(z)
100
( ) R
> z <- 0:9
> digits <- as.character(z)
z c("0", "1", "2", ..., "9") digits
(coercion) digits
:
> d <- as.integer(digits)
d z R as.something()
10
15. CHAPTER 3. 11
3.2
” ”
> e <- numeric()
character()
> e[3] <- 17
e 3 NA
scan()
alpha 10
> alpha <- alpha[2 * 1:5]
5
3.3
attributes(object)
attr(object,name)
R
R
attr()
> attr(z,"dim") <- c(10,10)
R z 10 × 10
3.4
R
"data.frame"
plot()
summary()
unclass() winter
"data.frame"
> winter
> unclass(winter)
sec10.9
16. Chapter 4
1
R
4.1
30
> state <- c("tas", "sa", "qld", "nsw", "nsw", "nt", "wa", "wa",
"qld", "vic", "nsw", "vic", "qld", "qld", "sa", "tas",
"sa", "nt", "wa", "vic", "qld", "nsw", "nsw", "wa",
"sa", "act", "nsw", "vic", "vic", "act")
factor()
> statef <- factor(state)
print()
> statef
[1] tas sa qld nsw nsw nt wa wa qld vic nsw vic qld qld sa
[16] tas sa nt wa vic qld nsw nsw wa sa act nsw vic vic act
Levels: act nsw nt qld sa tas vic wa
levels()
4.2 tapply() ragged
> incomes <- c(60, 49, 40, 61, 64, 60, 59, 54, 62, 69, 70, 42, 56,
61, 61, 61, 58, 51, 48, 65, 49, 49, 41, 48, 52, 46,
59, 46, 58, 43)
1
12
17. CHAPTER 4. 13
tapply()
> incmeans <- tapply(incomes, statef, mean)
act nsw nt qld sa tas vic wa
44.500 57.333 55.500 53.600 55.000 60.500 56.000 52.250
tapply()
incomes mean() incomes tapply()
statef
incomes statef tapply()
R R
var()
> stderr <- function(x) sqrt(var(x)/length(x))
> incster <- tapply(incomes, statef, stderr)
> incster
act nsw nt qld sa tas vic wa
1.5 4.3102 4.5 4.1061 2.7386 0.5 5.244 2.6575
95%
tapply() length() qt()
t
tapply()
ragged array
4.3
ordered()
18. Chapter 5
5.1
R
2 k
1 R
dim
z 1500
> dim(z) <- c(3,5,100)
dim 3 × 5 × 100
matrix() array()
FORTRAN
—” ”
a c(3,4,2) 24
a[1,1,1], a[2,1,1],..., a[2,4,2], a[3,4,2]
5.2
a[2,,] 4×2 c(4,2)
c(a[2,1,1], a[2,2,1], a[2,3,1], a[2,4,1],
a[2,1,2], a[2,2,2], a[2,3,2], a[2,4,2])
a[,,] a
Z dim()
14
19. CHAPTER 5. 15
5.3
4×5 X
• X[1,3], X[2,2] X[3,1]
• 0
3×2
> x <- array(1:20,dim=c(4,5)) # Generate a 4 by 5 array.
> x
[,1] [,2] [,3] [,4] [,5]
[1,] 1 5 9 13 17
[2,] 2 6 10 14 18
[3,] 3 7 11 15 19
[4,] 4 8 12 16 20
> i <- array(c(1:3,3:1),dim=c(3,2))
> i
[,1] [,2]
[1,] 1 3
[2,] 2 2
[3,] 3 1
> x[i]
[1] 9 6 3
> x[i] <- 0 # Replace those elements by zeros.
> x
[,1] [,2] [,3] [,4] [,5]
[1,] 1 5 0 13 17
[2,] 2 0 10 14 18
[3,] 0 7 11 15 19
[4,] 4 8 12 16 20
>
blocks(b levels) varieties
(v levels)
n plot
> Xb <- matrix(0, n, b)
> Xv <- matrix(0, n, v)
20. CHAPTER 5. 16
> ib <- cbind(1:n, blocks)
> iv <- cbind(1:n, varieties)
> Xb[ib] <- 1
> Xv[iv] <- 1
> X <- cbind(Xb, Xv)
N
> N <- crossprod(Xb, Xv)
table()
> N <- table(blocks, varieties)
5.4 array()
dim array
> Z <- array(data_vector, dim_vector)
h 24
> Z <- array(h, dim=c(3,4,2))
h Z 3×4×2 h 24
> dim(Z) <- c(3,4,2)
h 24 24
> Z <- array(0, c(3,4,2))
E
dim(Z) c(3,4,2) Z[1:24] Z[] Z
dim
A,B,C
> D <- 2*A*B + C + 1
D
5.4.1
•
•
•
dim (As long as short vectors and arrays
only are encountered, the arrays must all have the same dim attribute or
an error results.)
21. CHAPTER 5. 17
•
•
dim
5.5
a,b
a b
%o%:
> ab <- a %o% b
> ab <- outer(a, b, "*")
f (x; y) =
cos(y)/(1 + x2) x, y R x,y
> f <- function(x, y) cos(y)/(1 + x^2)
> z <- outer(x, y, f)
1
chapter10 R
2×2
2×2 [a, b; c, d]
0∼9
ad − bc
outer()
> d <- outer(0:9, 0:9)
> fr <- table(outer(d, d, "-"))
> plot(as.numeric(names(fr)), fr, type="h",
xlab="Determinant", ylab="Frequency")
names
for
for for chapter9
5.6
aperm(a, perm) perm 1, . . . , k
k a a
perm[j]
22. CHAPTER 5. 18
A
> B <- aperm(A, c(2,1))
B A t()
B <- t(A)
5.7
R
t(X) nrow(A) ncol(A)
A
5.7.1
%*% n×1 1×n
n
A,B
> A * B
> A %*% B
X
> x %*% A %*% x
crossprod() cross product crossprod(X, y)
t(X) %*% y crossprod()
crossprod(X) crossprod(X, X)
diag() v diag(v) v
M diag(M) M
diag() Matlab
k diag(k) k×k
5.7.2
> b <- A %*% x
A b x R
> solve(A,b)
x
solve(A) A A−1 x = A−1 b
solve(A,b) x <- solve(A) %*% b
x A−1 x x %*% solve(A,x)
A
23. CHAPTER 5. 19
5.7.3
eigen(Sm)
values vectors
> ev <- eigen(Sm)
ev ev$val ev$vec Sm
> evals <- eigen(Sm)$values
evals
> eigen(Sm)
> eigen(Sm)
5.8
1
5.9 QR
lsfit()
> ans <- lsfit(X, y)
y X
ls.diag()
ls.diag()
X X
qr()
> Xplus <- qr(X)
> b <- qr.coef(Xplus, y)
> fit <- qr.fitted(Xplus, y)
> res <- qr.resid(Xplus, y)
y x
5.10 cbind() rbind()
cbind() rbind()
cbind()
rbind()
> X <- cbind(arg 1, arg 2, arg 3, ...)
cbind()
5.11 c()
cbind() rbind() dim
c() dim,dimnames
1 http://mathworld.wolfram.com/SingularValueDecomposition.html
24. CHAPTER 5. 20
as.vector()
c()
5.12
table()
k k
statef
statefr
level
incomef cut() ”income
class”
29. Chapter 7
R
R R
1
Perl
R
read.table() scan()
R R data Import/Export
7.1 read.table()
•
•
Price Floor Area Rooms Age Cent.heat
01 52.00 111.0 830 5 6.2 no
02 54.75 128.0 710 5 7.5 no
03 57.50 101.0 1000 5 4.2 no
04 57.50 131.0 690 6 8.8 no
05 59.75 93.0 900 5 1.9 yes
...
Cent.heat
read.table()
1 Unix Sed Awk
25
30. CHAPTER 7. 26
> HousePrice <- read.table("houses.data")
Price Floor Area Rooms Age Cent.heat
52.00 111.0 830 5 6.2 no
54.75 128.0 710 5 7.5 no
57.50 101.0 1000 5 4.2 no
57.50 131.0 690 6 8.8 no
59.75 93.0 900 5 1.9 yes
...
> HousePrice <- read.table("houses.data", header=TRUE)
header=TRUE
7.2 scan()
3
’input.dat’ scan()
> inp <- scan("input.dat", list("",0,0))
inp
> label <- inp[[1]]; x <- inp[[2]]; y <- inp[[3]]
> inp <- scan("input.dat", list(id="", x=0, y=0))
> label <- in$id; x <- in$x; y <- in$y
2 chapter 6
> X <- matrix(scan("light.dat", 0), ncol=5, byrow=TRUE)
R
7.3
R 50
S-Plus data
data()
data(infert)
infert
31. CHAPTER 7. 27
R
7.3.1 R
package
data(package="nls")
data(Puromycin, package="nls")
library()
library(nls)
data()
data(Puromycin)
base nls
Puromycin
7.4
edit
> xnew <- edit(xold)
xold xnew
> xnew <- edit(data.frame())
32. Chapter 8
8.1 R—
R
P (X ≤ x) q P (X ≤ x) > q
x
Distribution R name additional arguments
beta beta shape1, shape2, ncp
binomial binom size, prob
Cauchy cauchy location, scale
chi-squared chisq df, ncp
exponential exp rate
F f df1, df1, ncp
gamma gamma shape, scale
geometric geom prob
hypergeometric hyper m, n, k
log-normal lnorm meanlog, sdlog
logistic logis location, scale
negative binomial nbinom size, prob
normal norm mean, sd
Poisson pois lambda
Student s t t df, ncp
uniform unif min, max
Weibull weibull shape, scale
Wilcoxon wilcox m, n
’d’ (density) ’p’ CDF ’q’
’r’
dxxx x pxxx q qxxx p rxxx n rhyper
rwilcox nn (non-
centrality parameter) CDF
pxxx qxxx lower.tail log.p dxxx
log
- pxxx(t, ..., lower.tail = FALSE, log.p = TRUE)
cumulative/integrated hazard function H(t) =
28
33. CHAPTER 8. 29
− log(1 − F (t)) dxxx(..., log = TRUE)
ptukey qtukey
> ## 2-tailed p-value for t distribution
> 2*pt(-2.43, df = 13)
> ## upper 1% point for an F(2, 7) distribution
> qf(0.99, 2, 7)
8.2
summary fivenum
stem ”stem and leaf” plot
> data(faithful)
> attach(faithful)
> summary(eruptions)
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.600 2.163 4.000 3.488 4.454 5.100
> fivenum(eruptions)
[1] 1.6000 2.1585 4.0000 4.4585 5.1000
> stem(eruptions)
The decimal point is 1 digit(s) to the left of the |
16 | 070355555588
18 | 000022233333335577777777888822335777888
20 | 00002223378800035778
22 | 0002335578023578
24 | 00228
26 | 23
28 | 080
30 | 7
32 | 2337
34 | 250077
36 | 0000823577
38 | 2333335582225577
40 | 0000003357788888002233555577778
42 | 03335555778800233333555577778
44 | 02222335557780000000023333357778888
46 | 0000233357700000023578
48 | 00000022335800333
50 | 0370
R hist
> hist(eruptions)
## make the bins smaller, make a plot of density
> hist(eruptions, seq(1.6, 5.2, 0.2), prob=TRUE)
> lines(density(eruptions, bw=0.1))
> rug(eruptions) # show the actual data points
34. CHAPTER 8. 30
density
density
1
bw (trial-and-error) MASS KernSmooth
Histogram of eruptions
0.7
0.6
0.5
0.4
Density
0.3
0.2
0.1
0.0
1.5 2.0 2.5 3.0 3.5 4.0 4.5 5.0
eruptions
stepfun ecdf
> library(stepfun)
> plot(ecdf(eruptions), do.points=FALSE, verticals=TRUE)
eruption 3 CDF
> long <- eruptions[eruptions > 3]
> plot(ecdf(long), do.points=FALSE, verticals=TRUE)
> x <- seq(3, 5.4, 0.01)
> lines(x, pnorm(x, mean=mean(long), sd=sqrt(var(long))), lty=3)
ecdf(long)
1.0
0.8
0.6
Fn(x)
0.4
0.2
0.0
3.0 3.5 4.0 4.5 5.0
x
Quantile-quantile (Q-Q)
1 it usually does for interesting densities
35. CHAPTER 8. 31
par(pty="s")
qqnorm(long); qqline(long)
t
Normal Q−Q Plot
5.0
4.5
Sample Quantiles
4.0
3.5
3.0
−2 −1 0 1 2
Theoretical Quantiles
x <- rt(250, df = 5)
qqnorm(x); qqline(x)
qqplot(qt(ppoints(250), df=5), x, xlab="Q-Q plot for t dsn")
qqline(x)
Q-Q
ctest
Shapiro-Wilk
> library(ctest)
> shapiro.test(long)
Shapiro-Wilk normality test
data: long
W = 0.9793, p-value = 0.01052
Kolmogorov-Smirnov
> ks.test(long, "pnorm", mean=mean(long), sd=sqrt(var(long)))
One-sample Kolmogorov-Smirnov test
data: long
D = 0.0661, p-value = 0.4284
alternative hypothesis: two.sided
36. CHAPTER 8. 32
8.3
R
ctest
library(ctest)
(cal/gm) Rice(1995,P.490)
Method A: 79.98 80.04 80.02 80.04 80.03 80.03 80.04 79.97
80.05 80.03 80.02 80.00 80.02
Method B: 80.02 79.94 79.98 79.97 79.97 80.03 79.95 79.97
box-plot
A <- scan()
79.98 80.04 80.02 80.04 80.03 80.03 80.04 79.97
80.05 80.03 80.02 80.00 80.02
B <- scan()
80.02 79.94 79.98 79.97 79.97 80.03 79.95 79.97
boxplot(A, B)
80.04
80.02
80.00
79.98
79.96
79.94
1 2
t-
> t.test(A, B)
Welch Two Sample t-test
data: A and B
t = 3.2499, df = 12.027, p-value = 0.00694
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
37. CHAPTER 8. 33
0.01385526 0.07018320
sample estimates:
mean of x mean of y
80.02077 79.97875
R
S-Plus t.test
F
> var.test(A, B)
F test to compare two variances
data: A and B
F = 0.5837, num df = 12, denom df = 7, p-value = 0.3938
alternative hypothesis: true ratio of variances is not equal to 1
95 percent confidence interval:
0.1251097 2.1052687
sample estimates:
ratio of variances
0.5837405
t-
> t.test(A, B, var.equal=TRUE)
Two Sample t-test
data: A and B
t = 3.4722, df = 19, p-value = 0.002551
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
0.01669058 0.06734788
sample estimates:
mean of x mean of y
80.02077 79.97875
Wilcoxon Mann-
Whitney (null hypothesis)
> wilcox.test(A, B)
Wilcoxon rank sum test with continuity correction
data: A and B
W = 89, p-value = 0.007497
alternative hypothesis: true mu is not equal to 0
38. CHAPTER 8. 34
Warning message:
Cannot compute exact p-value with ties in: wilcox.test.default(A, B)
> library(stepfun)
> plot(ecdf(A), do.points=FALSE, verticals=TRUE, xlim=range(A, B))
> plot(ecdf(B), do.points=FALSE, verticals=TRUE, add=TRUE)
CDF qqplot Q-Q
Kolmogorov-Smirnov ecdf
> ks.test(A, B)
Two-sample Kolmogorov-Smirnov test
data: A and B
D = 0.5962, p-value = 0.05919
alternative hypothesis: two.sided
Warning message:
cannot compute correct p-values with ties in: ks.test(A, B)
39. Chapter 9
9.1
R
expr 1; . . . ; expr m
9.2
9.2.1 if
R
> if (expr_1) expr_2 else expr_3
expr 1
&& || if
& | && || & | && ||
if/else ifelse ifelse (condition,a,b)
condition[i] a[i]
b[i] a,b
9.2.2 for repeat while
loop
> for (name in expr_1) expr_2
name expr 1 1:20
expr 2
dummy name expr 2 name
expr 1
35
40. CHAPTER 9. 36
ind x
y x coplot()
> xc <- split(x, ind)
> yc <- split(y, ind)
> for (i in 1:length(yc)) {
plot(xc[[i]], yc[[i]]);
abline(lsfit(xc[[i]], yc[[i]]))
}
split()
help
for() R
R ’ ’ whole object
> repeat (expr )
> while (condition) expr
break
repeat
next
chapter
10
41. Chapter 10
R function
R
R
R
R mean(),
var(),postscript() R
> name <- function(arg_1, arg_2, ...) expression
expression R arg i
name(expr 1,expr 2,...)
10.1
t
> twosam <- function(y1, y2) {
n1 <- length(y1); n2 <- length(y2)
yb1 <- mean(y1); yb2 <- mean(y2)
s1 <- var(y1); s2 <- var(y2)
s <- ((n1-1)*s1 + (n2-1)*s2)/(n1+n2-2)
tst <- (yb1 - yb2)/sqrt(s2*(1/n1 + 1/n2))
tst
}
t Z
> tstat <- twosam(data$male, data$female); tstat
Matlab backslash
y X
37
42. CHAPTER 10. 38
qr()
n y n×p X Xy (X X)−1 X y
−1
(X X) (X X)
> bslash <- function(X, y) {
X <- qr(X)
qr.coef(X, y)
}
> regcoeff <- bslash(Xmat, yvar)
1
R lsfit() qr() qr.coef()
10.2
bslash()
%anything%
!
> "%!%" <- function(X, y) { ... }
x%!%y
(backslash)
%*% %o%
10.3
section 2.3 "name = obj"
fun1
> fun1 <- function(data, data.frame, graph, limit) {
[function body omitted]
}
> ans <- fun1(d, df, TRUE, 20)
> ans <- fun1(d, df, graph=TRUE, limit=20)
> ans <- fun1(data=d, limit=20, graph=TRUE, data.frame=df)
1 chapter11
43. CHAPTER 10. 39
fun1
> fun1 <- function(data, data.frame, graph=TRUE, limit=20) { ... }
> ans <- fun1(d, df)
> ans <- fun1(d, df, limit=10)
10.4 ’...’
par() plot() par()
"..."
fun1 <- function(data, data.frame, graph=TRUE, limit=20, ...) {
[omitted statements]
if (graph)
par(pch="*", ...)
[more omissions]
}
10.5
(local) (temporary)
X<-qr(x)
R
(global) (permanent)
, <<- assign() S-PLUS
<<- R 10.7
10.6
10.6.1 (Efficiency factors)
sect
blocks(b levels) varieties(v levels)
R K v×v (replications matrix) b × b
(block size matrix) N b×b (incidence matrix)
44. CHAPTER 10. 40
E = Iv R1/2 N K −1 N R1/2 = Iv A A
A = K 1/2 N R−1/2
> bdeff <- function(blocks, varieties) {
blocks <- as.factor(blocks) # minor safety move
b <- length(levels(blocks))
varieties <- as.factor(varieties) # minor safety move
v <- length(levels(varieties))
K <- as.vector(table(blocks)) # remove dim attr
R <- as.vector(table(varieties)) # remove dim attr
N <- table(blocks, varieties)
A <- 1/sqrt(K) * N * rep(1/sqrt(R), rep(b, v))
sv <- svd(A)
list(eff=1 - sv$d^2, blockcv=sv$u, varietycv=sv$v)
}
(block and variety canonical contrasts)
10.6.2
dimnames
dimnames X
> temp <- X
> dimnames(temp) <- list(rep("", nrow(X)), rep("", ncol(X))
> temp; rm(temp)
no.dimnames()
”wrap around”
no.dimnames <- function(a) {
## Remove all dimension names from an array for compact printing.
d <- list()
l <- 0
for(i in dim(a)) {
d[[l <- l + 1]] <- rep("", i)
}
dimnames(a) <- d
a
}
> no.dimnames(X)
45. CHAPTER 10. 41
10.6.3
—
one panel trapizum rule tow panel trapizum
panel
R
area <- function(f, a, b, eps = 1.0e-06, lim = 10) {
fun1 <- function(f, a, b, fa, fb, a0, eps, lim, fun) {
## function fun1 is only visible inside area
d <- (a + b)/2
h <- (b - a)/4
fd <- f(d)
a1 <- h * (fa + fd)
a2 <- h * (fd + fb)
if(abs(a0 - a1 - a2) < eps || lim == 0)
return(a1 + a2)
else {
return(fun(f, a, d, fa, fd, a1, eps, lim - 1, fun) +
fun(f, d, b, fd, fb, a2, eps, lim - 1, fun))
}
}
fa <- f(a)
fb <- f(b)
a0 <- ((fa + fb) * (b - a))/2
fun1(f, a, b, fa, fb, a0, eps, lim, fun1)
}
10.7 (scope)
S-Plus R
(symbol) 3
binding
f <- function(x) {
y <- 2*x
print(x)
print(y)
print(z)
}
46. CHAPTER 10. 42
x y z
R
(lexical scope)
cube
cube <- function(n) {
sq <- function() n*n
n*sq()
}
sq n
(static scope) S-
Plus n
(R) sq n cube
n sq R S-Plus S-Plus
n R cube n
## first evaluation in S
S> cube(2)
Error in sq(): Object "n" not found
Dumped
S> n <- 3
S> cube(2)
[1] 18
## then the same function evaluated in R
R> cube(2)
[1] 8
(mutable state)
R
account
account total
total
total
<<- total
total
2
total
R
<<- <<-
open.account <- function(total) {
list(
deposit = function(amount) {
if(amount <= 0)
stop("Deposits must be positive!n")
2 S-Plus S-Plus
47. CHAPTER 10. 43
total <<- total + amount
cat(amount, "deposited. Your balance is", total, "nn")
},
withdraw = function(amount) {
if(amount > total)
stop("You don t have that much money!n")
total <<- total - amount
cat(amount, "withdrawn. Your balance is", total, "nn")
},
balance = function() {
cat("Your balance is", total, "nn")
}
)
}
ross <- open.account(100)
robert <- open.account(200)
ross$withdraw(30)
ross$balance()
robert$balance()
ross$deposit(50)
ross$balance()
ross$withdraw(500)
10.8
R (site
initialization file)
.First .Last
R_PROFILE
R ’etc’
R ’.Rprofile’3
R
’.Rprofile’
R ’.Rprofile’
’.RData’ .First()
R
’Rprofile.site’, ’.Rprofile’,’.RData’ .First()
> .First <- function() {
options(prompt="$ ", continue="+t") # $ is the prompt
options(digits=5, length=999) # custom numbers and printout
x11() # for graphics
par(pch = "+") # plotting character
source(file.path(Sys.getenv("HOME"), "R", "mystuff.R"))
3 UNIX
48. CHAPTER 10. 44
# my personal package
library(stepfun) # attach the step function tools
}
.Last()
> .Last <- function() {
graphics.off() # a small safety measure.
cat(paste(date(),"nAdiosn")) # Is it time for lunch?
}
10.9
(class) (generic function)
plot()
summary(), anova()
’’data.frame’’
[ [[<- any as.matrix
[<- model plot summary
method()
> methods(class="data.frame")
’’data.frame’’
,’’density’’,’’factor’’— plot()
method()
> methods(plot)
49. Chapter 11
R
R
11.1
p
yi = j=0 βj xij + ei , ei ∼ N ID(0, σ 2 ), i = 1, . . . , n
y = Xβ + e
y X x0 , x1 , . . . , xp
x0 1
Examples
y,x,x0,x1,x2,... X A,B,C,...
y~x
y~1+x
y x
y~0+x
y~-1+x
y~x-1
y x
45
50. CHAPTER 11. R 46
log(y)~x1+x2
log(y) x1 x2
y~poly(x,2)
y~1+x+I(x^2)
y x
y~X+poly(x,2)
y X x
y~A
y A
y~A+x
y A x
y~A*B
y~A+B+A:B
y~B%in%A
y~A/B
y A B (non-additive)
(crossed classification)
(nested classification)
y~(A+B+C)^2
y~A*B*C-A:B:C
y~A*x
y~A/x
y~A/(1+x)-1
y x A
A
y~A*B+Error(C)
A B C (error strata)
C
R ~
response ˜ op 1 term 1 op 2 term 2 op 3 term 3 . . .
51. CHAPTER 11. R 47
response
op i + −
term i
• 1
•
•
1
Wilkinson Rogers Glim Genstat
R ’.’ ’:’
C&H 1992,p29
Y˜M
Y M
M 1+M 2
M1 M2
M 1-M 2
M1 M2
M 1:M 2
M1 M2 (subclasses)
M 1 %in% M 2
M1 M2
M1*M2
M 1+M 2+M 1:M 2
M 1/M 2
M 1 + M 2 %in% M 1
Mˆn
M n
I(M)
M M
I()
52. CHAPTER 11. R 48
11.1.1 (contrasts)
1
k A
2,. . .,k k−1
k−1
1,. . .,k
k
contrasts (options)
R
options(contrasts = c("contr.treatment", "contr.poly"))
R S S
Helmert Contrasts R S-Plus
options(contrasts = c("contr.helmert", "contr.poly"))
treatment contrast R
contrasts C
R
11.2
lm()
> fitted.model <- lm(formula, data = data.frame)
> fm2 <- lm(y ~ x1 + x2, data = production)
y x1 x2
data = production
production production
11.3
lm() "lm"
"lm"
add1 coef effects kappa predict residuals
alias deviance family labels print step
anova drop1 formula plot proj summary
58. CHAPTER 11. R 54
200 .1
> out <- nlm(fn, p = c(200, 0.1), hessian = TRUE)
out$minimum SSE out$estimates
> sqrt(diag(2*out$minimum/(length(y) - 2) * solve(out$hessian)))
2 +1 .96SE 95%
> plot(x, y)
> xfit <- seq(.02, 1.1, .05)
> yfit <- 212.68384222 * xfit/(0.06412146 + xfit)
> lines(spline(xfit, yfit))
nls
Michaelis-Menten
> df <- data.frame(x=x, y=y)
> fit <- nls(y ~ SSmicmen(x, Vm, K), df)
> fit
Nonlinear regression model
model: y ~ SSmicmen(x, Vm, K)
data: df
Vm K
212.68370749 0.06412123
residual sum-of-squares: 1195.449
> summary(fit)
Formula: y ~ SSmicmen(x, Vm, K)
Parameters:
Estimate Std. Error t value Pr(>|t|)
Vm 2.127e+02 6.947e+00 30.615 3.24e-11 ***
K 6.412e-02 8.281e-03 7.743 1.57e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 10.93 on 10 degrees of freedom
Correlation of Parameter Estimates:
Vm
K 0.7651
11.7.2
Dobson (1990),pp.108-111 - logistic
glm()
59. CHAPTER 11. R 55
> x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113,
1.8369, 1.8610, 1.8839)
> y <- c( 6, 13, 18, 28, 52, 53, 61, 60)
> n <- c(59, 60, 62, 56, 63, 59, 62, 60)
> fn <- function(p)
sum( - (y*(p[1]+p[2]*x) - n*log(1+exp(p[1]+p[2]*x))
+ log(choose(n, y)) ))
> out <- nlm(fn, p = c(-50,20), hessian = TRUE)
out$minimum out$estimates
> sqrt(diag(solve(out$hessian)))
±1.96SE 95%
11.8
R
• ( Mixed models ) nlme
lme() nlme()
• ( Local approximating regressions ) loess()
loess projection pursuit regression )
modreg
• ( Robust regression) —
lqs lqs
state-of-art
MASS
rlm
•Additive models
•Tree based models tbm
tree()
tbm plot() text()
Tree models rpart tree
60. Chapter 12
R
R
UNIX X11() Windows
windows() MacOS 8/9 macintosh()
R
• (High-level)
• (Low-level)
• (Interactive)
R
12.1
12.1.1 plot()
R plot()
(class)
plot(x,y)
plot(xy)
56
61. CHAPTER 12. 57
x y plot(x,y) y x x y
plot(x)
x x
x
plot(f)
plot(f,y)
f y f
y f
plot(df)
plot( expr)
plot(y expr)
df y expr
’+’( a + b + c)
y expr
12.1.2
R X
> pairs(X)
X
coplot a b
c
> coplot(a ~ b | c)
c a b c
c a b c
conditioning intervals c
a b coplot()
given.values co.intervals()
>coplot(a ~ b | c+d)
c d a b
coplot() pairs() panel=
points()
panel=
panel.smooth()
62. CHAPTER 12. 58
12.1.3
qqnorm(x)
qqline(x)
qqplot(x,y)
x
x y
hist(x)
hist(x,nclass=n)
hist(x,breaks=b, . . . )
x
nclass= breaks=
probability=TRUE
dotchart(x, . . . )
x (dotchart) y x x
image(x,y,z, . . . )
contour(x,y,z, . . . )
persp(x,y,z, . . . )
image
z contour z persp
3D
12.1.4
add=TRUE
axes=FALSE — axis()
axes=TRUE
log=”x”
log=”y”
63. CHAPTER 12. 59
log=”xy” x,y
type= type=
type=”p”
type=”l”
type=”b” both
type=”o”
type=”h” high-density
type=”s”
type=”s”
type=”n”
xlab=string
ylab=string
x y
main=string
sub=string
x
12.2
points(x,y)
lines(x,y)
plot() type=
"p" points() "l" lines()
text(x,y,labels, . . . )
x,y labels
labels[i] (x[i],y[i]) 1:length(x)
Note > plot(x, y, type="n"); text(x, y, names)
type=”n” text()
names
64. CHAPTER 12. 60
abline(a, b)
abline(h=y)
abline(v=x)
abline(lm.obj)
b a h=y
v=x x
lm.obj coefficients
2
polygon(x, y, . . . )
(x,y)
legend(x, y, legend, . . . )
legend v
legend( , fill=v)
legend( , col=v)
legend( , lty=v)
legend( , lwd=v)
legend( , pch=v)
( )
title(main,sub)
main
sub
axis(side, . . . )
1 4
—
axes=FALSE
plot()
x,y
(user coordinates)
x,y x,y
locator()
65. CHAPTER 12. 61
12.2.1
R text, mtext,axis title
> text(x, y, expression(paste(bgroup("(", atop(n, x), ")"),
p^x, q^{n-x})))
R
> help(plotmath)
> example(plotmath)
12.2.2 Hershey
text contour Hershey Hershey
• Hershey
• Hershey
• Hershey
Hershey R
> help(Hershey)
> example(Hershey)
> help(Japanese)
> example(Japanese)
12.3
R
locator()
locator(n,type)
n( 512)
Unix,Windows) Mac
type
locator() x,y
locator()
> text(locator(1), "Outlier", adj=0)
locator()
x,y
66. CHAPTER 12. 62
identify(x, y, labels)
x,y labels
labels
Unix,Windows Mac
x,y
(x, y) identify()
> plot(x,y) > identify(x,y)
identify()
x/y
identify() labels
plot=FALSE
identify() x y
12.4
R
R
’col’
12.4.1 (Permanent changes): par()
par()
par()
par(c(”col”,”lty”))
par(col=4, lty=2)
par()
par() par()
—
67. CHAPTER 12. 63
R
par()
> oldpar <- par(col=4, lty=2) . . . plotting commands . . . > par(oldpar)
12.4.2
par()
> plot(x, y, pch="+")
12.5
par() R
name=value
name par()
value
12.5.1
R
pch=”+”
’◦’ ”.”
pch=4 0 18
> legend(locator(1), as.character(0:18), pch=0:18)
lty=2
1 2
lwd=2 ” ” line()
col=2
font=2 1
2 3 4
font.axis
68. CHAPTER 12. 64
font.lab
font.main
font.sub x,y
adj=-0.1 0 1 0.5
-0.1
10%
cex=1.5
12.5.2
R axis()
axis line lty
tick mark tick label
lab=c(5,7,12)
x y
las=1 0 1 2
mgp=c(3,1,0)
(text lines)
0
tck=0.01 tck
0.5 x y tck=1
tck=0.01 mgp=c(1,-1.5,0)
xaxs=”s”
yaxs=”d” x y s(standard) e(extended)
(extended)
i(internal) r( )
r
d
69. CHAPTER 12. 65
12.5.3 (Figure margins)
R figure (plot region)
−−−−−−−−−−−−−−−−−−
−−−−−−−−−−−−−−−−−−
−−−−−−−−−−−−−−−−−−
−−−−−−−−−−−−−−−−−− mar[3]
−−−−−−−−−−−−−−−−−−
−−−−−−−−−−−−−−−−−−
3.0
Plot region
1.5
0.0
y
mai[2]
−1.5
−3.0
−3.0 −1.5 0.0 1.5 3.0
mai[1] x
Margin
mai=c(1,0.5,0.5,0)
mar=c(4,2,2,1)
mai (text lines)
mai mar
postscript()
height=4 mar mai
50%
12.5.4
R n×m