SlideShare ist ein Scribd-Unternehmen logo
1 von 14
Downloaden Sie, um offline zu lesen
R:
k
    apply
    sesejun@is.ocha.ac.jp
         2009/11/19
USPS
ImageName     Class   0,0   0,1   0,2   0,3   0,4
img_2_00_02   1       0     0     0     0     0
img_2_00_03   1       0     38    22    0     0
img_2_00_05   1       13    0     64    13    42
...
img_0_00_09   -1      34    53    0     38    0
img_0_00_28   -1      0     64    0     98    93
img_0_01_08   -1      13    0     0     59    13
img_0_03_05   -1      34    34    0     0     0
img_3_29_25   img_5_03_31   img_3_06_30   img_3_17_08
k-NN
Apply Family
•                                  ,      ,
    •   for
    •
    apply(X, 1,        )

    apply(X, 2,        )

apply(X, c(1,2),           )

    lapply(X,      )
                                                 dataframe


    sapply(X,      )
                                                   table

     sweep(X, M,V)             X       (M=1)   (M=2)         (M=c(1,2))   V
                                                                              7
1
> m <- matrix((1:9)**2, nrow=3)   > l <- list(a=1:3, b=4:6)
> m                               > l
     [,1] [,2] [,3]               $a
[1,]    1   16   49               [1] 1 2 3
[2,]    4   25   64
[3,]    9   36   81               $b
> apply(m, 1, sum)                [1] 4 5 6
[1] 66 93 126
> apply(m, 2, sum)                > lapply(l, sum)
[1] 14 77 194                     $a
> apply(m, c(1,2), sqrt)          [1] 6
     [,1] [,2] [,3]
[1,]    1    4     7              $b
[2,]    2    5     8              [1] 15
[3,]    3    6     9
                                  > sapply(l, sum)
                                   a b
                                   6 15

                                                              8
2
> m <- matrix((1:9)**2, nrow=3)   > l <- list(a=1:3, b=4:6)
> m                               > l
     [,1] [,2] [,3]               $a
[1,]    1   16   49               [1] 1 2 3
[2,]    4   25   64
[3,]    9   36   81               $b
> apply(m, 1, sum)                [1] 4 5 6
[1] 66 93 126
> apply(m, 2, sum)                > lapply(l, sum)
[1] 14 77 194                     $a
> apply(m, c(1,2), sqrt)          [1] 6
     [,1] [,2] [,3]
[1,]    1    4     7              $b
[2,]    2    5     8              [1] 15
[3,]    3    6     9
                                  > sapply(l, sum)
                                   a b
                                   6 15

                                                              9
K-NN
 •
> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> iris.test <- read.table("iris_test.csv", sep=",", header=T)

> q <- iris.test[1,1:4]

> diff <- sweep(iris.train[1:4], 2, t(q))

> diff * diff

> distquery <- apply(diff * diff, 1, sum)

> sort(distquery)

> order(distquery)




                                                                  10
1

> iris.train[order(distquery)[1:5],]

> iris.train[order(distquery)[1:5],]$Class

> knnclasses <- table(iris.train[order(distquery)[1:5],]$Class)

> as.factor(table(knnclasses)

> sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)

> labels(sortedtable)[1]

> predclass <- labels(sortedtable)[1]

> predclass == iris.test$Class[1]



                                                                    11
>   knnpredict <- function(train,class,query,k) {
+   diff <- sweep(train,2,query)
+   distquery <- apply(diff * diff, 1, sum)
+   knnclasses <- class[order(distquery)[1:k]]
+   sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)
+   labels(sortedtable)[1]
+   }

> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]),
5)

> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]),
1)


> for (i in 1:length(rownames(iris.test))) {
+ pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test
[i,1:4]),10)
+ result <- pred == iris.test[i,]$Class
+ cat(paste(pred, iris.test[i,]$Class, result, sep="t"))
+ cat("n")
+ }                                                                 12
> resvec <- c()
> for (i in 1:30) {
+ pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test
[i,1:4]),10)
+ resvec <- append(resvec, pred == iris.test[i,]$Class)
+ }
> sum(resvec)/length(resvec)




                                                                      13
3
1.    IRIS
     1.   IRIS                              4    ("Sepal.length","Sepal.width",
          "Petal.length","Petal.width")


     2.   IRIS
                                                   K-NN


2.    USPS
     1.   USPS                            5-NN                      (0-9)


     2.   K-NN      K
     3.   USPS                                                                    K-NN
                                                                            K-NN    K


     4.   USPS

                                                                                         14

Weitere ähnliche Inhalte

Was ist angesagt?

Derivatives vinnie
Derivatives vinnieDerivatives vinnie
Derivatives vinnie
canalculus
 
Pt 3&4 turunan fungsi implisit dan cyclometri
Pt 3&4 turunan fungsi implisit dan cyclometriPt 3&4 turunan fungsi implisit dan cyclometri
Pt 3&4 turunan fungsi implisit dan cyclometri
lecturer
 
Composicion de funciones
Composicion de funcionesComposicion de funciones
Composicion de funciones
Paito Sarauz
 
Nov. 3 Coordinate Equation Of A Circle
Nov. 3 Coordinate Equation Of A CircleNov. 3 Coordinate Equation Of A Circle
Nov. 3 Coordinate Equation Of A Circle
RyanWatt
 
8th alg -l10.6
8th alg -l10.68th alg -l10.6
8th alg -l10.6
jdurst65
 
Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4
Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4
Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4
lecturer
 
Emat 213 midterm 2 fall 2005
Emat 213 midterm 2 fall 2005Emat 213 midterm 2 fall 2005
Emat 213 midterm 2 fall 2005
akabaka12
 
Lesson 19: Double Integrals over General Regions
Lesson 19: Double Integrals over General RegionsLesson 19: Double Integrals over General Regions
Lesson 19: Double Integrals over General Regions
Matthew Leingang
 
14 6 equations of-circles
14 6 equations of-circles14 6 equations of-circles
14 6 equations of-circles
gwilson8786
 

Was ist angesagt? (19)

Derivatives vinnie
Derivatives vinnieDerivatives vinnie
Derivatives vinnie
 
Pt 3&4 turunan fungsi implisit dan cyclometri
Pt 3&4 turunan fungsi implisit dan cyclometriPt 3&4 turunan fungsi implisit dan cyclometri
Pt 3&4 turunan fungsi implisit dan cyclometri
 
DISTANCE TWO LABELING FOR MULTI-STOREY GRAPHS
DISTANCE TWO LABELING FOR MULTI-STOREY GRAPHSDISTANCE TWO LABELING FOR MULTI-STOREY GRAPHS
DISTANCE TWO LABELING FOR MULTI-STOREY GRAPHS
 
General equation of a circle
General equation of a circleGeneral equation of a circle
General equation of a circle
 
F4 c1 functions__new__1_
F4 c1 functions__new__1_F4 c1 functions__new__1_
F4 c1 functions__new__1_
 
Composicion de funciones
Composicion de funcionesComposicion de funciones
Composicion de funciones
 
Nov. 3 Coordinate Equation Of A Circle
Nov. 3 Coordinate Equation Of A CircleNov. 3 Coordinate Equation Of A Circle
Nov. 3 Coordinate Equation Of A Circle
 
0207 ch 2 day 7
0207 ch 2 day 70207 ch 2 day 7
0207 ch 2 day 7
 
Talk5
Talk5Talk5
Talk5
 
1509 circle- coordinate geometry
1509 circle- coordinate geometry1509 circle- coordinate geometry
1509 circle- coordinate geometry
 
Bc4103338340
Bc4103338340Bc4103338340
Bc4103338340
 
8th alg -l10.6
8th alg -l10.68th alg -l10.6
8th alg -l10.6
 
Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4
Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4
Pt 2 turunan fungsi eksponen, logaritma, implisit dan cyclometri-d4
 
maths basics
maths basicsmaths basics
maths basics
 
Lesson18 Double Integrals Over Rectangles Slides
Lesson18   Double Integrals Over Rectangles SlidesLesson18   Double Integrals Over Rectangles Slides
Lesson18 Double Integrals Over Rectangles Slides
 
Emat 213 midterm 2 fall 2005
Emat 213 midterm 2 fall 2005Emat 213 midterm 2 fall 2005
Emat 213 midterm 2 fall 2005
 
Lesson 19: Double Integrals over General Regions
Lesson 19: Double Integrals over General RegionsLesson 19: Double Integrals over General Regions
Lesson 19: Double Integrals over General Regions
 
14 6 equations of-circles
14 6 equations of-circles14 6 equations of-circles
14 6 equations of-circles
 
Functions
FunctionsFunctions
Functions
 

Ähnlich wie Datamining r 4th

Datamining R 4th
Datamining R 4thDatamining R 4th
Datamining R 4th
sesejun
 
Datamining r 1st
Datamining r 1stDatamining r 1st
Datamining r 1st
sesejun
 
Datamining R 1st
Datamining R 1stDatamining R 1st
Datamining R 1st
sesejun
 
Datastructure tree
Datastructure treeDatastructure tree
Datastructure tree
rantd
 
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Vyacheslav Arbuzov
 

Ähnlich wie Datamining r 4th (20)

Datamining R 4th
Datamining R 4thDatamining R 4th
Datamining R 4th
 
Datamining r 1st
Datamining r 1stDatamining r 1st
Datamining r 1st
 
PRE: Datamining 2nd R
PRE: Datamining 2nd RPRE: Datamining 2nd R
PRE: Datamining 2nd R
 
Datamining R 1st
Datamining R 1stDatamining R 1st
Datamining R 1st
 
R
RR
R
 
Datastructure tree
Datastructure treeDatastructure tree
Datastructure tree
 
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSPSend + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
 
Day 4b iteration and functions for-loops.pptx
Day 4b   iteration and functions  for-loops.pptxDay 4b   iteration and functions  for-loops.pptx
Day 4b iteration and functions for-loops.pptx
 
Vectormaths and Matrix in R.pptx
Vectormaths and Matrix in R.pptxVectormaths and Matrix in R.pptx
Vectormaths and Matrix in R.pptx
 
Test (S) on R
Test (S) on RTest (S) on R
Test (S) on R
 
Set 1 mawar
Set 1 mawarSet 1 mawar
Set 1 mawar
 
Matrix chain multiplication
Matrix chain multiplicationMatrix chain multiplication
Matrix chain multiplication
 
[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2
 
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
 
Secretary_Game_With_Rejection.pdf
Secretary_Game_With_Rejection.pdfSecretary_Game_With_Rejection.pdf
Secretary_Game_With_Rejection.pdf
 
9 chap
9 chap9 chap
9 chap
 
Question bank xi
Question bank xiQuestion bank xi
Question bank xi
 
Test s velocity_15_5_4
Test s velocity_15_5_4Test s velocity_15_5_4
Test s velocity_15_5_4
 
RHadoop の紹介
RHadoop の紹介RHadoop の紹介
RHadoop の紹介
 
Longest Common Subsequence & Matrix Chain Multiplication
Longest Common Subsequence & Matrix Chain MultiplicationLongest Common Subsequence & Matrix Chain Multiplication
Longest Common Subsequence & Matrix Chain Multiplication
 

Mehr von sesejun

次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習
sesejun
 
20110524zurichngs 2nd pub
20110524zurichngs 2nd pub20110524zurichngs 2nd pub
20110524zurichngs 2nd pub
sesejun
 
20110524zurichngs 1st pub
20110524zurichngs 1st pub20110524zurichngs 1st pub
20110524zurichngs 1st pub
sesejun
 
20110214nips2010 read
20110214nips2010 read20110214nips2010 read
20110214nips2010 read
sesejun
 
Datamining 9th association_rule.key
Datamining 9th association_rule.keyDatamining 9th association_rule.key
Datamining 9th association_rule.key
sesejun
 
Datamining 8th hclustering
Datamining 8th hclusteringDatamining 8th hclustering
Datamining 8th hclustering
sesejun
 
Datamining r 3rd
Datamining r 3rdDatamining r 3rd
Datamining r 3rd
sesejun
 
Datamining r 2nd
Datamining r 2ndDatamining r 2nd
Datamining r 2nd
sesejun
 
Datamining 6th svm
Datamining 6th svmDatamining 6th svm
Datamining 6th svm
sesejun
 
Datamining 5th knn
Datamining 5th knnDatamining 5th knn
Datamining 5th knn
sesejun
 
Datamining 4th adaboost
Datamining 4th adaboostDatamining 4th adaboost
Datamining 4th adaboost
sesejun
 
Datamining 3rd naivebayes
Datamining 3rd naivebayesDatamining 3rd naivebayes
Datamining 3rd naivebayes
sesejun
 
Datamining 2nd decisiontree
Datamining 2nd decisiontreeDatamining 2nd decisiontree
Datamining 2nd decisiontree
sesejun
 
Datamining 7th kmeans
Datamining 7th kmeansDatamining 7th kmeans
Datamining 7th kmeans
sesejun
 
100401 Bioinfoinfra
100401 Bioinfoinfra100401 Bioinfoinfra
100401 Bioinfoinfra
sesejun
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
sesejun
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
sesejun
 

Mehr von sesejun (20)

RNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A ReviewRNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A Review
 
バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析
 
次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習
 
20110602labseminar pub
20110602labseminar pub20110602labseminar pub
20110602labseminar pub
 
20110524zurichngs 2nd pub
20110524zurichngs 2nd pub20110524zurichngs 2nd pub
20110524zurichngs 2nd pub
 
20110524zurichngs 1st pub
20110524zurichngs 1st pub20110524zurichngs 1st pub
20110524zurichngs 1st pub
 
20110214nips2010 read
20110214nips2010 read20110214nips2010 read
20110214nips2010 read
 
Datamining 9th association_rule.key
Datamining 9th association_rule.keyDatamining 9th association_rule.key
Datamining 9th association_rule.key
 
Datamining 8th hclustering
Datamining 8th hclusteringDatamining 8th hclustering
Datamining 8th hclustering
 
Datamining r 3rd
Datamining r 3rdDatamining r 3rd
Datamining r 3rd
 
Datamining r 2nd
Datamining r 2ndDatamining r 2nd
Datamining r 2nd
 
Datamining 6th svm
Datamining 6th svmDatamining 6th svm
Datamining 6th svm
 
Datamining 5th knn
Datamining 5th knnDatamining 5th knn
Datamining 5th knn
 
Datamining 4th adaboost
Datamining 4th adaboostDatamining 4th adaboost
Datamining 4th adaboost
 
Datamining 3rd naivebayes
Datamining 3rd naivebayesDatamining 3rd naivebayes
Datamining 3rd naivebayes
 
Datamining 2nd decisiontree
Datamining 2nd decisiontreeDatamining 2nd decisiontree
Datamining 2nd decisiontree
 
Datamining 7th kmeans
Datamining 7th kmeansDatamining 7th kmeans
Datamining 7th kmeans
 
100401 Bioinfoinfra
100401 Bioinfoinfra100401 Bioinfoinfra
100401 Bioinfoinfra
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
 

Datamining r 4th

  • 1. R: k apply sesejun@is.ocha.ac.jp 2009/11/19
  • 3. ImageName Class 0,0 0,1 0,2 0,3 0,4 img_2_00_02 1 0 0 0 0 0 img_2_00_03 1 0 38 22 0 0 img_2_00_05 1 13 0 64 13 42 ... img_0_00_09 -1 34 53 0 38 0 img_0_00_28 -1 0 64 0 98 93 img_0_01_08 -1 13 0 0 59 13 img_0_03_05 -1 34 34 0 0 0
  • 4.
  • 5. img_3_29_25 img_5_03_31 img_3_06_30 img_3_17_08
  • 7. Apply Family • , , • for • apply(X, 1, ) apply(X, 2, ) apply(X, c(1,2), ) lapply(X, ) dataframe sapply(X, ) table sweep(X, M,V) X (M=1) (M=2) (M=c(1,2)) V 7
  • 8. 1 > m <- matrix((1:9)**2, nrow=3) > l <- list(a=1:3, b=4:6) > m > l [,1] [,2] [,3] $a [1,] 1 16 49 [1] 1 2 3 [2,] 4 25 64 [3,] 9 36 81 $b > apply(m, 1, sum) [1] 4 5 6 [1] 66 93 126 > apply(m, 2, sum) > lapply(l, sum) [1] 14 77 194 $a > apply(m, c(1,2), sqrt) [1] 6 [,1] [,2] [,3] [1,] 1 4 7 $b [2,] 2 5 8 [1] 15 [3,] 3 6 9 > sapply(l, sum) a b 6 15 8
  • 9. 2 > m <- matrix((1:9)**2, nrow=3) > l <- list(a=1:3, b=4:6) > m > l [,1] [,2] [,3] $a [1,] 1 16 49 [1] 1 2 3 [2,] 4 25 64 [3,] 9 36 81 $b > apply(m, 1, sum) [1] 4 5 6 [1] 66 93 126 > apply(m, 2, sum) > lapply(l, sum) [1] 14 77 194 $a > apply(m, c(1,2), sqrt) [1] 6 [,1] [,2] [,3] [1,] 1 4 7 $b [2,] 2 5 8 [1] 15 [3,] 3 6 9 > sapply(l, sum) a b 6 15 9
  • 10. K-NN • > iris.train <- read.table("iris_train.csv", sep=",", header=T) > iris.test <- read.table("iris_test.csv", sep=",", header=T) > q <- iris.test[1,1:4] > diff <- sweep(iris.train[1:4], 2, t(q)) > diff * diff > distquery <- apply(diff * diff, 1, sum) > sort(distquery) > order(distquery) 10
  • 11. 1 > iris.train[order(distquery)[1:5],] > iris.train[order(distquery)[1:5],]$Class > knnclasses <- table(iris.train[order(distquery)[1:5],]$Class) > as.factor(table(knnclasses) > sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T) > labels(sortedtable)[1] > predclass <- labels(sortedtable)[1] > predclass == iris.test$Class[1] 11
  • 12. > knnpredict <- function(train,class,query,k) { + diff <- sweep(train,2,query) + distquery <- apply(diff * diff, 1, sum) + knnclasses <- class[order(distquery)[1:k]] + sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T) + labels(sortedtable)[1] + } > knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]), 5) > knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]), 1) > for (i in 1:length(rownames(iris.test))) { + pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test [i,1:4]),10) + result <- pred == iris.test[i,]$Class + cat(paste(pred, iris.test[i,]$Class, result, sep="t")) + cat("n") + } 12
  • 13. > resvec <- c() > for (i in 1:30) { + pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test [i,1:4]),10) + resvec <- append(resvec, pred == iris.test[i,]$Class) + } > sum(resvec)/length(resvec) 13
  • 14. 3 1. IRIS 1. IRIS 4 ("Sepal.length","Sepal.width", "Petal.length","Petal.width") 2. IRIS K-NN 2. USPS 1. USPS 5-NN (0-9) 2. K-NN K 3. USPS K-NN K-NN K 4. USPS 14