SlideShare ist ein Scribd-Unternehmen logo
1 von 16
R:
    apply
Fisher
    sesejun@is.ocha.ac.jp
         2009/11/19
USPS
ImageName     Class   0,0   0,1   0,2   0,3   0,4
img_2_00_02   1       0     0     0     0     0
img_2_00_03   1       0     38    22    0     0
img_2_00_05   1       13    0     64    13    42
...
img_0_00_09   -1      34    53    0     38    0
img_0_00_28   -1      0     64    0     98    93
img_0_01_08   -1      13    0     0     59    13
img_0_03_05   -1      34    34    0     0     0
img_3_29_25   img_5_03_31   img_3_06_30   img_3_17_08
k-NN
Apply Family
•                                  ,      ,
    •   for
    •
    apply(X, 1,        )

    apply(X, 2,        )

apply(X, c(1,2),           )

    lapply(X,      )
                                                 dataframe


    sapply(X,      )
                                                   table

     sweep(X, M,V)             X       (M=1)   (M=2)         (M=c(1,2))   V
> m <- matrix((1:9)**2, nrow=3)   > l <- list(a=1:3, b=4:6)
> m                               > l
     [,1] [,2] [,3]               $a
[1,]    1   16   49               [1] 1 2 3
[2,]    4   25   64
[3,]    9   36   81               $b
> apply(m, 1, sum)                [1] 4 5 6
[1] 66 93 126
> apply(m, 2, sum)                > lapply(l, sum)
[1] 14 77 194                     $a
> apply(m, c(1,2), sqrt)          [1] 6
     [,1] [,2] [,3]
[1,]    1    4     7              $b
[2,]    2    5     8              [1] 15
[3,]    3    6     9
                                  > sapply(l, sum)
                                   a b
                                   6 15
K-NN
 •
> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> iris.test <- read.table("iris_test.csv", sep=",", header=T)

> q <- iris.test[1,1:4]

> diff <- sweep(iris.train[1:4], 2, t(q))

> diff * diff

> distquery <- apply(diff * diff, 1, sum)

> sort(distquery)

> order(distquery)
1

> iris.train[order(distquery)[1:5],]

> iris.train[order(distquery)[1:5],]$Class

> knnclasses <- table(iris.train[order(distquery)[1:5],]$Class)

> as.factor(table(knnclasses)

> sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)

> labels(sortedtable)[1]

> predclass <- labels(sortedtable)[1]

> predclass == iris.test$Class[1]
>   knnpredict <- function(train,class,query,k) {
+   diff <- sweep(train,2,query)
+   distquery <- apply(diff * diff, 1, sum)
+   knnclasses <- class[order(distquery)[1:k]]
+   sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T)
+   labels(sortedtable)[1]
+   }

> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]),
5)

> knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]),
1)


> for (i in 1:length(rownames(iris.test))) {
+ pred <- knnpredict(iris.train[1:4], iris.train$Class,
t(iris.test[i,1:4]),10)
+ result <- pred == iris.test[i,]$Class
+ cat(paste(pred, iris.test[i,]$Class, result, sep="t"))
+ cat("n")
+ }
> resvec <- c()
> for (i in 1:30) {
+ pred <- knnpredict(iris.train[1:4], iris.train$Class,
t(iris.test[i,1:4]),10)
+ resvec <- append(resvec, pred == iris.test[i,]$Class)
+ }
> sum(resvec)/length(resvec)
SVM
SVM
> iris.train <- read.table("iris_train.csv", sep=",", header=T)
> iris.test <- read.table("iris_test.csv", sep=",", header=T)

> library("e1071")

> iris.model <- svm(iris.train[1:4], iris.train$Class)

> iris.pred <- predict(iris.model, iris.test[1:4])

> table(iris.pred, iris.test$Class)

iris.pred         Iris-setosa Iris-versicolor Iris-virginica
  Iris-setosa               7               0              0
  Iris-versicolor           0               9              0
  Iris-virginica            0               0             14
> iris.model <- svm(iris.train[1:4], iris.train$Class, kernel=”linear”)

> iris.pred <- predict(iris.model, iris.test[1:4])

> table(iris.pred, iris.test$Class)

iris.pred         Iris-setosa Iris-versicolor Iris-virginica
  Iris-setosa               7               0              0
  Iris-versicolor           0               9              0
  Iris-virginica            0               0             14
1. IRIS
                                                   3
  1. IRIS                              4    ("Sepal.length","Sepal.width",
     "Petal.length","Petal.width")


  2. IRIS
                                                           K-NN


2. USPS
  1. USPS                            5-NN                        (0-9)


  2. K-NN      K


  3. USPS                            SVM                radial

Weitere ähnliche Inhalte

Was ist angesagt?

24 double integral over polar coordinate
24 double integral over polar coordinate24 double integral over polar coordinate
24 double integral over polar coordinate
math267
 

Was ist angesagt? (20)

Data Clustering with R
Data Clustering with RData Clustering with R
Data Clustering with R
 
Beyond Scala Lens
Beyond Scala LensBeyond Scala Lens
Beyond Scala Lens
 
R seminar dplyr package
R seminar dplyr packageR seminar dplyr package
R seminar dplyr package
 
Optics with monocle - Modeling the part and the whole
Optics with monocle - Modeling the part and the wholeOptics with monocle - Modeling the part and the whole
Optics with monocle - Modeling the part and the whole
 
Data manipulation on r
Data manipulation on rData manipulation on r
Data manipulation on r
 
Jacobson Theorem
Jacobson TheoremJacobson Theorem
Jacobson Theorem
 
Data manipulation with dplyr
Data manipulation with dplyrData manipulation with dplyr
Data manipulation with dplyr
 
Clustering and Visualisation using R programming
Clustering and Visualisation using R programmingClustering and Visualisation using R programming
Clustering and Visualisation using R programming
 
Grouping & Summarizing Data in R
Grouping & Summarizing Data in RGrouping & Summarizing Data in R
Grouping & Summarizing Data in R
 
19 tables
19 tables19 tables
19 tables
 
24 double integral over polar coordinate
24 double integral over polar coordinate24 double integral over polar coordinate
24 double integral over polar coordinate
 
Final Exam Review (Integration)
Final Exam Review (Integration)Final Exam Review (Integration)
Final Exam Review (Integration)
 
Parabola direction , vertex ,roots, minimum and maximum
Parabola direction , vertex ,roots, minimum and maximumParabola direction , vertex ,roots, minimum and maximum
Parabola direction , vertex ,roots, minimum and maximum
 
08 functions
08 functions08 functions
08 functions
 
Ese563
Ese563 Ese563
Ese563
 
maths basics
maths basicsmaths basics
maths basics
 
Table of Useful R commands.
Table of Useful R commands.Table of Useful R commands.
Table of Useful R commands.
 
Lesson 8: Curves, Arc Length, Acceleration
Lesson 8: Curves, Arc Length, AccelerationLesson 8: Curves, Arc Length, Acceleration
Lesson 8: Curves, Arc Length, Acceleration
 
Python Seaborn Data Visualization
Python Seaborn Data Visualization Python Seaborn Data Visualization
Python Seaborn Data Visualization
 
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSPSend + More = Money – Let’s mash 2 monads to solve a simple CSP
Send + More = Money – Let’s mash 2 monads to solve a simple CSP
 

Andere mochten auch

20110214nips2010 read
20110214nips2010 read20110214nips2010 read
20110214nips2010 read
sesejun
 
bioinfolec_9th_20071019
bioinfolec_9th_20071019bioinfolec_9th_20071019
bioinfolec_9th_20071019
sesejun
 
Datamining 7th kmeans
Datamining 7th kmeansDatamining 7th kmeans
Datamining 7th kmeans
sesejun
 
Ohp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No NagareOhp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No Nagare
sesejun
 
bioinfolec_3rd_20070629
bioinfolec_3rd_20070629bioinfolec_3rd_20070629
bioinfolec_3rd_20070629
sesejun
 
Datamining 3rd naivebayes
Datamining 3rd naivebayesDatamining 3rd naivebayes
Datamining 3rd naivebayes
sesejun
 
20110524zurichngs 1st pub
20110524zurichngs 1st pub20110524zurichngs 1st pub
20110524zurichngs 1st pub
sesejun
 

Andere mochten auch (14)

20110214nips2010 read
20110214nips2010 read20110214nips2010 read
20110214nips2010 read
 
080811
080811080811
080811
 
bioinfolec_9th_20071019
bioinfolec_9th_20071019bioinfolec_9th_20071019
bioinfolec_9th_20071019
 
Datamining 7th kmeans
Datamining 7th kmeansDatamining 7th kmeans
Datamining 7th kmeans
 
Ohp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No NagareOhp Seijoen H20 01 Programming No Nagare
Ohp Seijoen H20 01 Programming No Nagare
 
bioinfolec_3rd_20070629
bioinfolec_3rd_20070629bioinfolec_3rd_20070629
bioinfolec_3rd_20070629
 
Extreme Web Performance for Mobile Devices - Velocity NY
Extreme Web Performance for Mobile Devices - Velocity NYExtreme Web Performance for Mobile Devices - Velocity NY
Extreme Web Performance for Mobile Devices - Velocity NY
 
Probabilistic data structures. Part 4. Similarity
Probabilistic data structures. Part 4. SimilarityProbabilistic data structures. Part 4. Similarity
Probabilistic data structures. Part 4. Similarity
 
Pecha Kucha: Ukrainian Food Traditions
Pecha Kucha: Ukrainian Food TraditionsPecha Kucha: Ukrainian Food Traditions
Pecha Kucha: Ukrainian Food Traditions
 
Datamining 3rd naivebayes
Datamining 3rd naivebayesDatamining 3rd naivebayes
Datamining 3rd naivebayes
 
Consistent hashing
Consistent hashingConsistent hashing
Consistent hashing
 
20110524zurichngs 1st pub
20110524zurichngs 1st pub20110524zurichngs 1st pub
20110524zurichngs 1st pub
 
A Gentle Introduction to Locality Sensitive Hashing with Apache Spark
A Gentle Introduction to Locality Sensitive Hashing with Apache SparkA Gentle Introduction to Locality Sensitive Hashing with Apache Spark
A Gentle Introduction to Locality Sensitive Hashing with Apache Spark
 
Data Mining - lecture 6 - 2014
Data Mining - lecture 6 - 2014Data Mining - lecture 6 - 2014
Data Mining - lecture 6 - 2014
 

Ähnlich wie Datamining R 4th

Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Vyacheslav Arbuzov
 
Datamining R 1st
Datamining R 1stDatamining R 1st
Datamining R 1st
sesejun
 
Datamining r 1st
Datamining r 1stDatamining r 1st
Datamining r 1st
sesejun
 
Useful javascript
Useful javascriptUseful javascript
Useful javascript
Lei Kang
 
Datastructure tree
Datastructure treeDatastructure tree
Datastructure tree
rantd
 

Ähnlich wie Datamining R 4th (20)

R
RR
R
 
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov VyacheslavSeminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
Seminar PSU 09.04.2013 - 10.04.2013 MiFIT, Arbuzov Vyacheslav
 
PRE: Datamining 2nd R
PRE: Datamining 2nd RPRE: Datamining 2nd R
PRE: Datamining 2nd R
 
Datamining R 1st
Datamining R 1stDatamining R 1st
Datamining R 1st
 
Datamining r 1st
Datamining r 1stDatamining r 1st
Datamining r 1st
 
An Introduction into Anomaly Detection Using CUSUM
An Introduction into Anomaly Detection Using CUSUMAn Introduction into Anomaly Detection Using CUSUM
An Introduction into Anomaly Detection Using CUSUM
 
Useful javascript
Useful javascriptUseful javascript
Useful javascript
 
R and data mining
R and data miningR and data mining
R and data mining
 
Chapter 04-discriminant analysis
Chapter 04-discriminant analysisChapter 04-discriminant analysis
Chapter 04-discriminant analysis
 
Test (S) on R
Test (S) on RTest (S) on R
Test (S) on R
 
[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2[1062BPY12001] Data analysis with R / week 2
[1062BPY12001] Data analysis with R / week 2
 
Introduction to R
Introduction to RIntroduction to R
Introduction to R
 
Cloudera - A Taste of random decision forests
Cloudera - A Taste of random decision forestsCloudera - A Taste of random decision forests
Cloudera - A Taste of random decision forests
 
R Workshop for Beginners
R Workshop for BeginnersR Workshop for Beginners
R Workshop for Beginners
 
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
Extending Spark SQL API with Easier to Use Array Types Operations with Marek ...
 
Datastructure tree
Datastructure treeDatastructure tree
Datastructure tree
 
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
Optimization and Mathematical Programming in R and ROI - R Optimization Infra...
 
Артём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data AnalysisАртём Акуляков - F# for Data Analysis
Артём Акуляков - F# for Data Analysis
 
Python 101 language features and functional programming
Python 101 language features and functional programmingPython 101 language features and functional programming
Python 101 language features and functional programming
 
PHP and MySQL Tips and tricks, DC 2007
PHP and MySQL Tips and tricks, DC 2007PHP and MySQL Tips and tricks, DC 2007
PHP and MySQL Tips and tricks, DC 2007
 

Mehr von sesejun

次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習
sesejun
 
20110524zurichngs 2nd pub
20110524zurichngs 2nd pub20110524zurichngs 2nd pub
20110524zurichngs 2nd pub
sesejun
 
Datamining 9th association_rule.key
Datamining 9th association_rule.keyDatamining 9th association_rule.key
Datamining 9th association_rule.key
sesejun
 
Datamining 8th hclustering
Datamining 8th hclusteringDatamining 8th hclustering
Datamining 8th hclustering
sesejun
 
Datamining r 3rd
Datamining r 3rdDatamining r 3rd
Datamining r 3rd
sesejun
 
Datamining r 2nd
Datamining r 2ndDatamining r 2nd
Datamining r 2nd
sesejun
 
Datamining 6th svm
Datamining 6th svmDatamining 6th svm
Datamining 6th svm
sesejun
 
Datamining 5th knn
Datamining 5th knnDatamining 5th knn
Datamining 5th knn
sesejun
 
Datamining 4th adaboost
Datamining 4th adaboostDatamining 4th adaboost
Datamining 4th adaboost
sesejun
 
Datamining 2nd decisiontree
Datamining 2nd decisiontreeDatamining 2nd decisiontree
Datamining 2nd decisiontree
sesejun
 
100401 Bioinfoinfra
100401 Bioinfoinfra100401 Bioinfoinfra
100401 Bioinfoinfra
sesejun
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
sesejun
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
sesejun
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
sesejun
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
sesejun
 
Datamining 7th Kmeans
Datamining 7th KmeansDatamining 7th Kmeans
Datamining 7th Kmeans
sesejun
 
Datamining 6th Svm
Datamining 6th SvmDatamining 6th Svm
Datamining 6th Svm
sesejun
 

Mehr von sesejun (20)

RNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A ReviewRNAseqによる変動遺伝子抽出の統計: A Review
RNAseqによる変動遺伝子抽出の統計: A Review
 
バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析バイオインフォマティクスによる遺伝子発現解析
バイオインフォマティクスによる遺伝子発現解析
 
次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習次世代シーケンサが求める機械学習
次世代シーケンサが求める機械学習
 
20110602labseminar pub
20110602labseminar pub20110602labseminar pub
20110602labseminar pub
 
20110524zurichngs 2nd pub
20110524zurichngs 2nd pub20110524zurichngs 2nd pub
20110524zurichngs 2nd pub
 
Datamining 9th association_rule.key
Datamining 9th association_rule.keyDatamining 9th association_rule.key
Datamining 9th association_rule.key
 
Datamining 8th hclustering
Datamining 8th hclusteringDatamining 8th hclustering
Datamining 8th hclustering
 
Datamining r 3rd
Datamining r 3rdDatamining r 3rd
Datamining r 3rd
 
Datamining r 2nd
Datamining r 2ndDatamining r 2nd
Datamining r 2nd
 
Datamining 6th svm
Datamining 6th svmDatamining 6th svm
Datamining 6th svm
 
Datamining 5th knn
Datamining 5th knnDatamining 5th knn
Datamining 5th knn
 
Datamining 4th adaboost
Datamining 4th adaboostDatamining 4th adaboost
Datamining 4th adaboost
 
Datamining 2nd decisiontree
Datamining 2nd decisiontreeDatamining 2nd decisiontree
Datamining 2nd decisiontree
 
100401 Bioinfoinfra
100401 Bioinfoinfra100401 Bioinfoinfra
100401 Bioinfoinfra
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
 
Datamining 9th Association Rule
Datamining 9th Association RuleDatamining 9th Association Rule
Datamining 9th Association Rule
 
Datamining 8th Hclustering
Datamining 8th HclusteringDatamining 8th Hclustering
Datamining 8th Hclustering
 
Datamining 7th Kmeans
Datamining 7th KmeansDatamining 7th Kmeans
Datamining 7th Kmeans
 
Datamining 6th Svm
Datamining 6th SvmDatamining 6th Svm
Datamining 6th Svm
 

Datamining R 4th

  • 1. R: apply Fisher sesejun@is.ocha.ac.jp 2009/11/19
  • 3. ImageName Class 0,0 0,1 0,2 0,3 0,4 img_2_00_02 1 0 0 0 0 0 img_2_00_03 1 0 38 22 0 0 img_2_00_05 1 13 0 64 13 42 ... img_0_00_09 -1 34 53 0 38 0 img_0_00_28 -1 0 64 0 98 93 img_0_01_08 -1 13 0 0 59 13 img_0_03_05 -1 34 34 0 0 0
  • 4.
  • 5. img_3_29_25 img_5_03_31 img_3_06_30 img_3_17_08
  • 7. Apply Family • , , • for • apply(X, 1, ) apply(X, 2, ) apply(X, c(1,2), ) lapply(X, ) dataframe sapply(X, ) table sweep(X, M,V) X (M=1) (M=2) (M=c(1,2)) V
  • 8. > m <- matrix((1:9)**2, nrow=3) > l <- list(a=1:3, b=4:6) > m > l [,1] [,2] [,3] $a [1,] 1 16 49 [1] 1 2 3 [2,] 4 25 64 [3,] 9 36 81 $b > apply(m, 1, sum) [1] 4 5 6 [1] 66 93 126 > apply(m, 2, sum) > lapply(l, sum) [1] 14 77 194 $a > apply(m, c(1,2), sqrt) [1] 6 [,1] [,2] [,3] [1,] 1 4 7 $b [2,] 2 5 8 [1] 15 [3,] 3 6 9 > sapply(l, sum) a b 6 15
  • 9. K-NN • > iris.train <- read.table("iris_train.csv", sep=",", header=T) > iris.test <- read.table("iris_test.csv", sep=",", header=T) > q <- iris.test[1,1:4] > diff <- sweep(iris.train[1:4], 2, t(q)) > diff * diff > distquery <- apply(diff * diff, 1, sum) > sort(distquery) > order(distquery)
  • 10. 1 > iris.train[order(distquery)[1:5],] > iris.train[order(distquery)[1:5],]$Class > knnclasses <- table(iris.train[order(distquery)[1:5],]$Class) > as.factor(table(knnclasses) > sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T) > labels(sortedtable)[1] > predclass <- labels(sortedtable)[1] > predclass == iris.test$Class[1]
  • 11. > knnpredict <- function(train,class,query,k) { + diff <- sweep(train,2,query) + distquery <- apply(diff * diff, 1, sum) + knnclasses <- class[order(distquery)[1:k]] + sortedtable <- sort(as.factor(table(knnclasses)), decreasing=T) + labels(sortedtable)[1] + } > knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[1,1:4]), 5) > knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[10,1:4]), 1) > for (i in 1:length(rownames(iris.test))) { + pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10) + result <- pred == iris.test[i,]$Class + cat(paste(pred, iris.test[i,]$Class, result, sep="t")) + cat("n") + }
  • 12. > resvec <- c() > for (i in 1:30) { + pred <- knnpredict(iris.train[1:4], iris.train$Class, t(iris.test[i,1:4]),10) + resvec <- append(resvec, pred == iris.test[i,]$Class) + } > sum(resvec)/length(resvec)
  • 13. SVM
  • 14. SVM > iris.train <- read.table("iris_train.csv", sep=",", header=T) > iris.test <- read.table("iris_test.csv", sep=",", header=T) > library("e1071") > iris.model <- svm(iris.train[1:4], iris.train$Class) > iris.pred <- predict(iris.model, iris.test[1:4]) > table(iris.pred, iris.test$Class) iris.pred Iris-setosa Iris-versicolor Iris-virginica Iris-setosa 7 0 0 Iris-versicolor 0 9 0 Iris-virginica 0 0 14
  • 15. > iris.model <- svm(iris.train[1:4], iris.train$Class, kernel=”linear”) > iris.pred <- predict(iris.model, iris.test[1:4]) > table(iris.pred, iris.test$Class) iris.pred Iris-setosa Iris-versicolor Iris-virginica Iris-setosa 7 0 0 Iris-versicolor 0 9 0 Iris-virginica 0 0 14
  • 16. 1. IRIS 3 1. IRIS 4 ("Sepal.length","Sepal.width", "Petal.length","Petal.width") 2. IRIS K-NN 2. USPS 1. USPS 5-NN (0-9) 2. K-NN K 3. USPS SVM radial