SlideShare ist ein Scribd-Unternehmen logo
1 von 29
Downloaden Sie, um offline zu lesen
Linear Regression
      with
      2: Model selection

     2012-12-10 @HSPH
    Kazuki Yoshida, M.D.
      MPH-CLE student

                           FREEDOM
                           TO	
  KNOW
Group Website is at:
http://rpubs.com/kaz_yos/useR_at_HSPH
Previously in this group
n   Introduction               n   Graphics

n   Reading Data into R (1)    n   Groupwise, continuous

n   Reading Data into R (2)    n   Linear regression

n   Descriptive, continuous

n   Descriptive, categorical

n   Deducer
Menu


n   Linear regression: Model selection
Ingredients
         Statistics                Programming
n   Selection methods      n   step()

                            n   drop1()

                            n   add1()

                            n   leaps::regsubsets()
Open
R Studio
Open the saved
   script that we
 created last time.
See also Linear Regression with R 1 slides
Create full & null models

lm.full <- lm(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
              race.cat + preterm, data = lbw)

lm.null <- lm(bwt ~ 1, data = lbw)



                 Intercept-only
Compare two models


anova(lm.full, lm.null)

      Model 1   Model 2
Models
             Partial F-test
               Difference in residual SS




    Residual sum of squares
Residual degree of freedom      Significant
Backward elimination
                    Specify full model



lm.step.bw <- step(lm.full, direction = "backward")




     Final model object
Initial
  AIC       Removing ftv.cat
for full   makes AIC smallest
 model


             Removing age
           makes AIC smallest



             Doing nothing
           makes AIC smallest
Forward selection
           Final model object
                                     Specify null model

lm.step.fw <- step(lm.null,
    scope = ~ age + lwt + smoke + ht + ui + ftv.cat +
            race.cat + preterm,
  direction = "forward")

                                formula for possible
                                     variables
Initial
 AIC
 for                              Adding ui
 null                         makes AIC smallest
model


                               Adding race.cat
                              makes AIC smallest



                                Adding smoke
                              makes AIC smallest

          Still goes on ...
Stepwise selection/elimination
           Final model object
                                     Specify null model

lm.step.both <- step(lm.null,
    scope = ~ age + lwt + smoke + ht + ui + ftv.cat +
            race.cat + preterm,
  direction = "both")

                                formula for possible
                                     variables
Initial
   AIC                                    Adding ui
   for                                makes AIC smallest
   null
  model

                                       Adding race.cat
  Removing is                         makes AIC smallest
also considered


                                        Adding smoke
  Removing is                         makes AIC smallest
also considered

                  Still goes on ...
F-test using drop1()
## age is the least significant by partial F test
drop1(lm.full, test = "F")

## After elimination, ftv.cat is the least significant
drop1(update(lm.full, ~ . -age), test = "F")

## After elimination, preterm is least significat at p = 0.12.
drop1(update(lm.full, ~ . -age -ftv.cat), test = "F")

## After elimination, all variables are significant at p < 0.1
drop1(update(lm.full, ~ . -age -ftv.cat -preterm), test = "F")

## Show summary for final model
summary(update(lm.full, ~ . -age -ftv.cat -preterm))
Updating models
## Remove age from full model
lm.age.less <- update(lm.full, ~ . -age)


                     all variables(.) minus age

## Adding ui to null model
lm.ui.only <- update(lm.null, ~ . +ui)



                       all variables (.) plus ui
test full model


age least significant
F-test comparing age-in
model to age-out model


remove age, and test




ftv.cat least significant


remove age, ftv.cat
F-test using add1()
## ui is the most significant variable
add1(lm.null, scope = ~ age + lwt + race.cat + smoke + preterm +
+ ui + ftv.cat, test = "F")

## After inclusion, race.cat is the most significant
add1(update(lm.null, ~ . +ui), scope = ~ age + lwt + race.cat +
smoke + preterm + ht + ui + ftv.cat, test = "F")

## After inclusion, smoke is the most significant
add1(update(lm.null, ~ . +ui +race.cat), scope = ~ age + lwt +
race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F")

## After inclusion, ht is the most significant
add1(update(lm.null, ~ . +ui +race.cat +smoke), scope = ~ age + l
+ race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F")
...
test null model



 ui most significant
F-test comparing ui-out
 model to ui-in model

        add ui, and test


race.cat most significant


  add ui and race.cat
All-subset regression
using leaps package
library(leaps)

regsubsets.out <-
  regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
             race.cat + preterm,
        data = lbw,
        nbest = 1,
        nvmax = NULL,
        force.in = NULL, force.out = NULL,
        method = "exhaustive")

summary(regsubsets.out)
library(leaps)          Result object


 regsubsets.out <-
   regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat +
                race.cat + preterm,            Full model
           data = lbw,           How many best models?
  Max
model size nbest = 1,               Forced variables
           nvmax = NULL,
           force.in = NULL, force.out = NULL,
           method = "exhaustive")

summary(regsubsets.out)
Forced variables


Variable combination    Best 1
                       predictor
                        model

                        Best 7
                       predictor
                        model
                        Best 10
                       predictor
                        model
plot(regsubsets.out, scale = "adjr2", main = "Adjusted R^2")
             the higher the better


                                     ~ lwt + smoke + ht + ui
                                      + race.cat + preterm


                                     ~ smoke + ht + ui + race


                                     ~ ui
library(car)
subsets(regsubsets.out, statistic="adjr2", legend = FALSE,
        min.size = 5, main = "Adjusted R^2")




             ~ lwt + smoke + ht + ui
              + race.cat + preterm
subsets(regsubsets.out, statistic="cp", legend = FALSE,
        min.size = 5, main = "Mallow Cp")




       First model for which Mallow
         Cp is less than number of
               regressors + 1




                                      ~ lwt + smoke + ht + ui
                                       + race.cat + preterm
Linear regression with R 2

Weitere ähnliche Inhalte

Was ist angesagt?

The Ring programming language version 1.10 book - Part 45 of 212
The Ring programming language version 1.10 book - Part 45 of 212The Ring programming language version 1.10 book - Part 45 of 212
The Ring programming language version 1.10 book - Part 45 of 212Mahmoud Samir Fayed
 
Sequence and Traverse - Part 2
Sequence and Traverse - Part 2Sequence and Traverse - Part 2
Sequence and Traverse - Part 2Philip Schwarz
 
Real World Haskell: Lecture 5
Real World Haskell: Lecture 5Real World Haskell: Lecture 5
Real World Haskell: Lecture 5Bryan O'Sullivan
 
Cs1123 9 strings
Cs1123 9 stringsCs1123 9 strings
Cs1123 9 stringsTAlha MAlik
 
Scala. Introduction to FP. Monads
Scala. Introduction to FP. MonadsScala. Introduction to FP. Monads
Scala. Introduction to FP. MonadsKirill Kozlov
 
The Ring programming language version 1.6 book - Part 37 of 189
The Ring programming language version 1.6 book - Part 37 of 189The Ring programming language version 1.6 book - Part 37 of 189
The Ring programming language version 1.6 book - Part 37 of 189Mahmoud Samir Fayed
 
Strings In OOP(Object oriented programming)
Strings In OOP(Object oriented programming)Strings In OOP(Object oriented programming)
Strings In OOP(Object oriented programming)Danial Virk
 
Real World Haskell: Lecture 4
Real World Haskell: Lecture 4Real World Haskell: Lecture 4
Real World Haskell: Lecture 4Bryan O'Sullivan
 
Real World Haskell: Lecture 2
Real World Haskell: Lecture 2Real World Haskell: Lecture 2
Real World Haskell: Lecture 2Bryan O'Sullivan
 
Real World Haskell: Lecture 1
Real World Haskell: Lecture 1Real World Haskell: Lecture 1
Real World Haskell: Lecture 1Bryan O'Sullivan
 
16 Linear data structures
16 Linear data structures16 Linear data structures
16 Linear data structuresmaznabili
 
Real World Haskell: Lecture 6
Real World Haskell: Lecture 6Real World Haskell: Lecture 6
Real World Haskell: Lecture 6Bryan O'Sullivan
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойSigma Software
 
High-Performance Haskell
High-Performance HaskellHigh-Performance Haskell
High-Performance HaskellJohan Tibell
 
The Ring programming language version 1.5.3 book - Part 35 of 184
The Ring programming language version 1.5.3 book - Part 35 of 184The Ring programming language version 1.5.3 book - Part 35 of 184
The Ring programming language version 1.5.3 book - Part 35 of 184Mahmoud Samir Fayed
 

Was ist angesagt? (20)

The Ring programming language version 1.10 book - Part 45 of 212
The Ring programming language version 1.10 book - Part 45 of 212The Ring programming language version 1.10 book - Part 45 of 212
The Ring programming language version 1.10 book - Part 45 of 212
 
Sequence and Traverse - Part 2
Sequence and Traverse - Part 2Sequence and Traverse - Part 2
Sequence and Traverse - Part 2
 
Real World Haskell: Lecture 5
Real World Haskell: Lecture 5Real World Haskell: Lecture 5
Real World Haskell: Lecture 5
 
Cs1123 9 strings
Cs1123 9 stringsCs1123 9 strings
Cs1123 9 strings
 
Scala. Introduction to FP. Monads
Scala. Introduction to FP. MonadsScala. Introduction to FP. Monads
Scala. Introduction to FP. Monads
 
The Ring programming language version 1.6 book - Part 37 of 189
The Ring programming language version 1.6 book - Part 37 of 189The Ring programming language version 1.6 book - Part 37 of 189
The Ring programming language version 1.6 book - Part 37 of 189
 
Strings In OOP(Object oriented programming)
Strings In OOP(Object oriented programming)Strings In OOP(Object oriented programming)
Strings In OOP(Object oriented programming)
 
Real World Haskell: Lecture 4
Real World Haskell: Lecture 4Real World Haskell: Lecture 4
Real World Haskell: Lecture 4
 
Real World Haskell: Lecture 2
Real World Haskell: Lecture 2Real World Haskell: Lecture 2
Real World Haskell: Lecture 2
 
(Ai lisp)
(Ai lisp)(Ai lisp)
(Ai lisp)
 
Real World Haskell: Lecture 1
Real World Haskell: Lecture 1Real World Haskell: Lecture 1
Real World Haskell: Lecture 1
 
16 Linear data structures
16 Linear data structures16 Linear data structures
16 Linear data structures
 
Headerfiles
HeaderfilesHeaderfiles
Headerfiles
 
Real World Haskell: Lecture 6
Real World Haskell: Lecture 6Real World Haskell: Lecture 6
Real World Haskell: Lecture 6
 
Столпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай МозговойСтолпы функционального программирования для адептов ООП, Николай Мозговой
Столпы функционального программирования для адептов ООП, Николай Мозговой
 
Lec2
Lec2Lec2
Lec2
 
Lect6 csp
Lect6 cspLect6 csp
Lect6 csp
 
High-Performance Haskell
High-Performance HaskellHigh-Performance Haskell
High-Performance Haskell
 
Lambda Calculus
Lambda CalculusLambda Calculus
Lambda Calculus
 
The Ring programming language version 1.5.3 book - Part 35 of 184
The Ring programming language version 1.5.3 book - Part 35 of 184The Ring programming language version 1.5.3 book - Part 35 of 184
The Ring programming language version 1.5.3 book - Part 35 of 184
 

Andere mochten auch

20130215 Reading data into R
20130215 Reading data into R20130215 Reading data into R
20130215 Reading data into RKazuki Yoshida
 
Variable selection for classification and regression using R
Variable selection for classification and regression using RVariable selection for classification and regression using R
Variable selection for classification and regression using RGregg Barrett
 
Weather forecasting technology
Weather forecasting technologyWeather forecasting technology
Weather forecasting technologyzionbrighton
 
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...Goran S. Milovanovic
 

Andere mochten auch (7)

20130215 Reading data into R
20130215 Reading data into R20130215 Reading data into R
20130215 Reading data into R
 
Linear Regression using R
Linear Regression using RLinear Regression using R
Linear Regression using R
 
Variable selection for classification and regression using R
Variable selection for classification and regression using RVariable selection for classification and regression using R
Variable selection for classification and regression using R
 
Varianzanalyse
VarianzanalyseVarianzanalyse
Varianzanalyse
 
Weather forecasting technology
Weather forecasting technologyWeather forecasting technology
Weather forecasting technology
 
Data analysis of weather forecasting
Data analysis of weather forecastingData analysis of weather forecasting
Data analysis of weather forecasting
 
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
Introduction to R for Data Science :: Session 7 [Multiple Linear Regression i...
 

Ähnlich wie Linear regression with R 2

Automatic and Interpretable Machine Learning with H2O and LIME
Automatic and Interpretable Machine Learning with H2O and LIMEAutomatic and Interpretable Machine Learning with H2O and LIME
Automatic and Interpretable Machine Learning with H2O and LIMEJo-fai Chow
 
Sorting Algorithms
Sorting AlgorithmsSorting Algorithms
Sorting Algorithmsmultimedia9
 
Introduction to functional programming using Ocaml
Introduction to functional programming using OcamlIntroduction to functional programming using Ocaml
Introduction to functional programming using Ocamlpramode_ce
 
Python Programming Homework Help.pptx
Python Programming Homework Help.pptxPython Programming Homework Help.pptx
Python Programming Homework Help.pptxPython Homework Help
 
Python Performance 101
Python Performance 101Python Performance 101
Python Performance 101Ankur Gupta
 
Python High Level Functions_Ch 11.ppt
Python High Level Functions_Ch 11.pptPython High Level Functions_Ch 11.ppt
Python High Level Functions_Ch 11.pptAnishaJ7
 
R/Finance 2009 Chicago
R/Finance 2009 ChicagoR/Finance 2009 Chicago
R/Finance 2009 Chicagogyollin
 
Cs2312 OOPS LAB MANUAL
Cs2312 OOPS LAB MANUALCs2312 OOPS LAB MANUAL
Cs2312 OOPS LAB MANUALPrabhu D
 
Functional programming ii
Functional programming iiFunctional programming ii
Functional programming iiPrashant Kalkar
 
Mario Fusco - Lazy Java - Codemotion Milan 2018
Mario Fusco - Lazy Java - Codemotion Milan 2018Mario Fusco - Lazy Java - Codemotion Milan 2018
Mario Fusco - Lazy Java - Codemotion Milan 2018Codemotion
 
How to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJITHow to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJITEgor Bogatov
 
Chap2 class,objects contd
Chap2 class,objects contdChap2 class,objects contd
Chap2 class,objects contdraksharao
 
The aggregate function - from sequential and parallel folds to parallel aggre...
The aggregate function - from sequential and parallel folds to parallel aggre...The aggregate function - from sequential and parallel folds to parallel aggre...
The aggregate function - from sequential and parallel folds to parallel aggre...Philip Schwarz
 

Ähnlich wie Linear regression with R 2 (20)

Automatic and Interpretable Machine Learning with H2O and LIME
Automatic and Interpretable Machine Learning with H2O and LIMEAutomatic and Interpretable Machine Learning with H2O and LIME
Automatic and Interpretable Machine Learning with H2O and LIME
 
Sorting Algorithms
Sorting AlgorithmsSorting Algorithms
Sorting Algorithms
 
Computer programming 2 Lesson 10
Computer programming 2  Lesson 10Computer programming 2  Lesson 10
Computer programming 2 Lesson 10
 
Introduction to functional programming using Ocaml
Introduction to functional programming using OcamlIntroduction to functional programming using Ocaml
Introduction to functional programming using Ocaml
 
Python Programming Homework Help.pptx
Python Programming Homework Help.pptxPython Programming Homework Help.pptx
Python Programming Homework Help.pptx
 
Python Performance 101
Python Performance 101Python Performance 101
Python Performance 101
 
Python High Level Functions_Ch 11.ppt
Python High Level Functions_Ch 11.pptPython High Level Functions_Ch 11.ppt
Python High Level Functions_Ch 11.ppt
 
R/Finance 2009 Chicago
R/Finance 2009 ChicagoR/Finance 2009 Chicago
R/Finance 2009 Chicago
 
Cs2312 OOPS LAB MANUAL
Cs2312 OOPS LAB MANUALCs2312 OOPS LAB MANUAL
Cs2312 OOPS LAB MANUAL
 
Functional programming ii
Functional programming iiFunctional programming ii
Functional programming ii
 
Lazy Java
Lazy JavaLazy Java
Lazy Java
 
Mario Fusco - Lazy Java - Codemotion Milan 2018
Mario Fusco - Lazy Java - Codemotion Milan 2018Mario Fusco - Lazy Java - Codemotion Milan 2018
Mario Fusco - Lazy Java - Codemotion Milan 2018
 
Lazy java
Lazy javaLazy java
Lazy java
 
Lazy Java
Lazy JavaLazy Java
Lazy Java
 
How to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJITHow to add an optimization for C# to RyuJIT
How to add an optimization for C# to RyuJIT
 
Chap2 class,objects contd
Chap2 class,objects contdChap2 class,objects contd
Chap2 class,objects contd
 
Python Homework Help
Python Homework HelpPython Homework Help
Python Homework Help
 
Stacks.ppt
Stacks.pptStacks.ppt
Stacks.ppt
 
Stacks.ppt
Stacks.pptStacks.ppt
Stacks.ppt
 
The aggregate function - from sequential and parallel folds to parallel aggre...
The aggregate function - from sequential and parallel folds to parallel aggre...The aggregate function - from sequential and parallel folds to parallel aggre...
The aggregate function - from sequential and parallel folds to parallel aggre...
 

Mehr von Kazuki Yoshida

Graphical explanation of causal mediation analysis
Graphical explanation of causal mediation analysisGraphical explanation of causal mediation analysis
Graphical explanation of causal mediation analysisKazuki Yoshida
 
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCTPharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCTKazuki Yoshida
 
What is the Expectation Maximization (EM) Algorithm?
What is the Expectation Maximization (EM) Algorithm?What is the Expectation Maximization (EM) Algorithm?
What is the Expectation Maximization (EM) Algorithm?Kazuki Yoshida
 
Propensity Score Methods for Comparative Effectiveness Research with Multiple...
Propensity Score Methods for Comparative Effectiveness Research with Multiple...Propensity Score Methods for Comparative Effectiveness Research with Multiple...
Propensity Score Methods for Comparative Effectiveness Research with Multiple...Kazuki Yoshida
 
Visual Explanation of Ridge Regression and LASSO
Visual Explanation of Ridge Regression and LASSOVisual Explanation of Ridge Regression and LASSO
Visual Explanation of Ridge Regression and LASSOKazuki Yoshida
 
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...Kazuki Yoshida
 
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...Kazuki Yoshida
 
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...Kazuki Yoshida
 
Spacemacs: emacs user's first impression
Spacemacs: emacs user's first impressionSpacemacs: emacs user's first impression
Spacemacs: emacs user's first impressionKazuki Yoshida
 
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...Kazuki Yoshida
 
Multiple Imputation: Joint and Conditional Modeling of Missing Data
Multiple Imputation: Joint and Conditional Modeling of Missing DataMultiple Imputation: Joint and Conditional Modeling of Missing Data
Multiple Imputation: Joint and Conditional Modeling of Missing DataKazuki Yoshida
 
20130222 Data structures and manipulation in R
20130222 Data structures and manipulation in R20130222 Data structures and manipulation in R
20130222 Data structures and manipulation in RKazuki Yoshida
 
(Very) Basic graphing with R
(Very) Basic graphing with R(Very) Basic graphing with R
(Very) Basic graphing with RKazuki Yoshida
 
Introduction to Deducer
Introduction to DeducerIntroduction to Deducer
Introduction to DeducerKazuki Yoshida
 
Groupwise comparison of continuous data
Groupwise comparison of continuous dataGroupwise comparison of continuous data
Groupwise comparison of continuous dataKazuki Yoshida
 
Categorical data with R
Categorical data with RCategorical data with R
Categorical data with RKazuki Yoshida
 
Install and Configure R and RStudio
Install and Configure R and RStudioInstall and Configure R and RStudio
Install and Configure R and RStudioKazuki Yoshida
 
Reading Data into R REVISED
Reading Data into R REVISEDReading Data into R REVISED
Reading Data into R REVISEDKazuki Yoshida
 
Descriptive Statistics with R
Descriptive Statistics with RDescriptive Statistics with R
Descriptive Statistics with RKazuki Yoshida
 

Mehr von Kazuki Yoshida (20)

Graphical explanation of causal mediation analysis
Graphical explanation of causal mediation analysisGraphical explanation of causal mediation analysis
Graphical explanation of causal mediation analysis
 
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCTPharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
Pharmacoepidemiology Lecture: Designing Observational CER to Emulate an RCT
 
What is the Expectation Maximization (EM) Algorithm?
What is the Expectation Maximization (EM) Algorithm?What is the Expectation Maximization (EM) Algorithm?
What is the Expectation Maximization (EM) Algorithm?
 
Propensity Score Methods for Comparative Effectiveness Research with Multiple...
Propensity Score Methods for Comparative Effectiveness Research with Multiple...Propensity Score Methods for Comparative Effectiveness Research with Multiple...
Propensity Score Methods for Comparative Effectiveness Research with Multiple...
 
Emacs Key Bindings
Emacs Key BindingsEmacs Key Bindings
Emacs Key Bindings
 
Visual Explanation of Ridge Regression and LASSO
Visual Explanation of Ridge Regression and LASSOVisual Explanation of Ridge Regression and LASSO
Visual Explanation of Ridge Regression and LASSO
 
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
ENAR 2018 Matching Weights to Simultaneously Compare Three Treatment Groups: ...
 
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
Search and Replacement Techniques in Emacs: avy, swiper, multiple-cursor, ag,...
 
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
Comparison of Privacy-Protecting Analytic and Data-sharing Methods: a Simulat...
 
Spacemacs: emacs user's first impression
Spacemacs: emacs user's first impressionSpacemacs: emacs user's first impression
Spacemacs: emacs user's first impression
 
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
Matching Weights to Simultaneously Compare Three Treatment Groups: a Simulati...
 
Multiple Imputation: Joint and Conditional Modeling of Missing Data
Multiple Imputation: Joint and Conditional Modeling of Missing DataMultiple Imputation: Joint and Conditional Modeling of Missing Data
Multiple Imputation: Joint and Conditional Modeling of Missing Data
 
20130222 Data structures and manipulation in R
20130222 Data structures and manipulation in R20130222 Data structures and manipulation in R
20130222 Data structures and manipulation in R
 
(Very) Basic graphing with R
(Very) Basic graphing with R(Very) Basic graphing with R
(Very) Basic graphing with R
 
Introduction to Deducer
Introduction to DeducerIntroduction to Deducer
Introduction to Deducer
 
Groupwise comparison of continuous data
Groupwise comparison of continuous dataGroupwise comparison of continuous data
Groupwise comparison of continuous data
 
Categorical data with R
Categorical data with RCategorical data with R
Categorical data with R
 
Install and Configure R and RStudio
Install and Configure R and RStudioInstall and Configure R and RStudio
Install and Configure R and RStudio
 
Reading Data into R REVISED
Reading Data into R REVISEDReading Data into R REVISED
Reading Data into R REVISED
 
Descriptive Statistics with R
Descriptive Statistics with RDescriptive Statistics with R
Descriptive Statistics with R
 

Kürzlich hochgeladen

Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3JemimahLaneBuaron
 
Interactive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationInteractive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationnomboosow
 
Nutritional Needs Presentation - HLTH 104
Nutritional Needs Presentation - HLTH 104Nutritional Needs Presentation - HLTH 104
Nutritional Needs Presentation - HLTH 104misteraugie
 
Student login on Anyboli platform.helpin
Student login on Anyboli platform.helpinStudent login on Anyboli platform.helpin
Student login on Anyboli platform.helpinRaunakKeshri1
 
Measures of Central Tendency: Mean, Median and Mode
Measures of Central Tendency: Mean, Median and ModeMeasures of Central Tendency: Mean, Median and Mode
Measures of Central Tendency: Mean, Median and ModeThiyagu K
 
IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...
IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...
IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...PsychoTech Services
 
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...Sapna Thakur
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfAdmir Softic
 
Unit-IV- Pharma. Marketing Channels.pptx
Unit-IV- Pharma. Marketing Channels.pptxUnit-IV- Pharma. Marketing Channels.pptx
Unit-IV- Pharma. Marketing Channels.pptxVishalSingh1417
 
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptxSOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptxiammrhaywood
 
1029-Danh muc Sach Giao Khoa khoi 6.pdf
1029-Danh muc Sach Giao Khoa khoi  6.pdf1029-Danh muc Sach Giao Khoa khoi  6.pdf
1029-Danh muc Sach Giao Khoa khoi 6.pdfQucHHunhnh
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhikauryashika82
 
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...EduSkills OECD
 
A Critique of the Proposed National Education Policy Reform
A Critique of the Proposed National Education Policy ReformA Critique of the Proposed National Education Policy Reform
A Critique of the Proposed National Education Policy ReformChameera Dedduwage
 
General AI for Medical Educators April 2024
General AI for Medical Educators April 2024General AI for Medical Educators April 2024
General AI for Medical Educators April 2024Janet Corral
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...christianmathematics
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdfQucHHunhnh
 
9548086042 for call girls in Indira Nagar with room service
9548086042  for call girls in Indira Nagar  with room service9548086042  for call girls in Indira Nagar  with room service
9548086042 for call girls in Indira Nagar with room servicediscovermytutordmt
 

Kürzlich hochgeladen (20)

Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3Q4-W6-Restating Informational Text Grade 3
Q4-W6-Restating Informational Text Grade 3
 
Interactive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communicationInteractive Powerpoint_How to Master effective communication
Interactive Powerpoint_How to Master effective communication
 
Nutritional Needs Presentation - HLTH 104
Nutritional Needs Presentation - HLTH 104Nutritional Needs Presentation - HLTH 104
Nutritional Needs Presentation - HLTH 104
 
Student login on Anyboli platform.helpin
Student login on Anyboli platform.helpinStudent login on Anyboli platform.helpin
Student login on Anyboli platform.helpin
 
Measures of Central Tendency: Mean, Median and Mode
Measures of Central Tendency: Mean, Median and ModeMeasures of Central Tendency: Mean, Median and Mode
Measures of Central Tendency: Mean, Median and Mode
 
IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...
IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...
IGNOU MSCCFT and PGDCFT Exam Question Pattern: MCFT003 Counselling and Family...
 
Mattingly "AI & Prompt Design: The Basics of Prompt Design"
Mattingly "AI & Prompt Design: The Basics of Prompt Design"Mattingly "AI & Prompt Design: The Basics of Prompt Design"
Mattingly "AI & Prompt Design: The Basics of Prompt Design"
 
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
BAG TECHNIQUE Bag technique-a tool making use of public health bag through wh...
 
Key note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdfKey note speaker Neum_Admir Softic_ENG.pdf
Key note speaker Neum_Admir Softic_ENG.pdf
 
Unit-IV- Pharma. Marketing Channels.pptx
Unit-IV- Pharma. Marketing Channels.pptxUnit-IV- Pharma. Marketing Channels.pptx
Unit-IV- Pharma. Marketing Channels.pptx
 
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptxSOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
SOCIAL AND HISTORICAL CONTEXT - LFTVD.pptx
 
1029-Danh muc Sach Giao Khoa khoi 6.pdf
1029-Danh muc Sach Giao Khoa khoi  6.pdf1029-Danh muc Sach Giao Khoa khoi  6.pdf
1029-Danh muc Sach Giao Khoa khoi 6.pdf
 
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in DelhiRussian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
Russian Escort Service in Delhi 11k Hotel Foreigner Russian Call Girls in Delhi
 
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
Presentation by Andreas Schleicher Tackling the School Absenteeism Crisis 30 ...
 
A Critique of the Proposed National Education Policy Reform
A Critique of the Proposed National Education Policy ReformA Critique of the Proposed National Education Policy Reform
A Critique of the Proposed National Education Policy Reform
 
INDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptx
INDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptxINDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptx
INDIA QUIZ 2024 RLAC DELHI UNIVERSITY.pptx
 
General AI for Medical Educators April 2024
General AI for Medical Educators April 2024General AI for Medical Educators April 2024
General AI for Medical Educators April 2024
 
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
Explore beautiful and ugly buildings. Mathematics helps us create beautiful d...
 
1029 - Danh muc Sach Giao Khoa 10 . pdf
1029 -  Danh muc Sach Giao Khoa 10 . pdf1029 -  Danh muc Sach Giao Khoa 10 . pdf
1029 - Danh muc Sach Giao Khoa 10 . pdf
 
9548086042 for call girls in Indira Nagar with room service
9548086042  for call girls in Indira Nagar  with room service9548086042  for call girls in Indira Nagar  with room service
9548086042 for call girls in Indira Nagar with room service
 

Linear regression with R 2

  • 1. Linear Regression with 2: Model selection 2012-12-10 @HSPH Kazuki Yoshida, M.D. MPH-CLE student FREEDOM TO  KNOW
  • 2. Group Website is at: http://rpubs.com/kaz_yos/useR_at_HSPH
  • 3. Previously in this group n Introduction n Graphics n Reading Data into R (1) n Groupwise, continuous n Reading Data into R (2) n Linear regression n Descriptive, continuous n Descriptive, categorical n Deducer
  • 4. Menu n Linear regression: Model selection
  • 5. Ingredients Statistics Programming n Selection methods n step() n drop1() n add1() n leaps::regsubsets()
  • 7. Open the saved script that we created last time. See also Linear Regression with R 1 slides
  • 8. Create full & null models lm.full <- lm(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw) lm.null <- lm(bwt ~ 1, data = lbw) Intercept-only
  • 9. Compare two models anova(lm.full, lm.null) Model 1 Model 2
  • 10. Models Partial F-test Difference in residual SS Residual sum of squares Residual degree of freedom Significant
  • 11. Backward elimination Specify full model lm.step.bw <- step(lm.full, direction = "backward") Final model object
  • 12. Initial AIC Removing ftv.cat for full makes AIC smallest model Removing age makes AIC smallest Doing nothing makes AIC smallest
  • 13. Forward selection Final model object Specify null model lm.step.fw <- step(lm.null, scope = ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, direction = "forward") formula for possible variables
  • 14. Initial AIC for Adding ui null makes AIC smallest model Adding race.cat makes AIC smallest Adding smoke makes AIC smallest Still goes on ...
  • 15. Stepwise selection/elimination Final model object Specify null model lm.step.both <- step(lm.null, scope = ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, direction = "both") formula for possible variables
  • 16. Initial AIC Adding ui for makes AIC smallest null model Adding race.cat Removing is makes AIC smallest also considered Adding smoke Removing is makes AIC smallest also considered Still goes on ...
  • 17. F-test using drop1() ## age is the least significant by partial F test drop1(lm.full, test = "F") ## After elimination, ftv.cat is the least significant drop1(update(lm.full, ~ . -age), test = "F") ## After elimination, preterm is least significat at p = 0.12. drop1(update(lm.full, ~ . -age -ftv.cat), test = "F") ## After elimination, all variables are significant at p < 0.1 drop1(update(lm.full, ~ . -age -ftv.cat -preterm), test = "F") ## Show summary for final model summary(update(lm.full, ~ . -age -ftv.cat -preterm))
  • 18. Updating models ## Remove age from full model lm.age.less <- update(lm.full, ~ . -age) all variables(.) minus age ## Adding ui to null model lm.ui.only <- update(lm.null, ~ . +ui) all variables (.) plus ui
  • 19. test full model age least significant F-test comparing age-in model to age-out model remove age, and test ftv.cat least significant remove age, ftv.cat
  • 20. F-test using add1() ## ui is the most significant variable add1(lm.null, scope = ~ age + lwt + race.cat + smoke + preterm + + ui + ftv.cat, test = "F") ## After inclusion, race.cat is the most significant add1(update(lm.null, ~ . +ui), scope = ~ age + lwt + race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F") ## After inclusion, smoke is the most significant add1(update(lm.null, ~ . +ui +race.cat), scope = ~ age + lwt + race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F") ## After inclusion, ht is the most significant add1(update(lm.null, ~ . +ui +race.cat +smoke), scope = ~ age + l + race.cat + smoke + preterm + ht + ui + ftv.cat, test = "F") ...
  • 21. test null model ui most significant F-test comparing ui-out model to ui-in model add ui, and test race.cat most significant add ui and race.cat
  • 23. library(leaps) regsubsets.out <- regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, data = lbw, nbest = 1, nvmax = NULL, force.in = NULL, force.out = NULL, method = "exhaustive") summary(regsubsets.out)
  • 24. library(leaps) Result object regsubsets.out <- regsubsets(bwt ~ age + lwt + smoke + ht + ui + ftv.cat + race.cat + preterm, Full model data = lbw, How many best models? Max model size nbest = 1, Forced variables nvmax = NULL, force.in = NULL, force.out = NULL, method = "exhaustive") summary(regsubsets.out)
  • 25. Forced variables Variable combination Best 1 predictor model Best 7 predictor model Best 10 predictor model
  • 26. plot(regsubsets.out, scale = "adjr2", main = "Adjusted R^2") the higher the better ~ lwt + smoke + ht + ui + race.cat + preterm ~ smoke + ht + ui + race ~ ui
  • 27. library(car) subsets(regsubsets.out, statistic="adjr2", legend = FALSE, min.size = 5, main = "Adjusted R^2") ~ lwt + smoke + ht + ui + race.cat + preterm
  • 28. subsets(regsubsets.out, statistic="cp", legend = FALSE, min.size = 5, main = "Mallow Cp") First model for which Mallow Cp is less than number of regressors + 1 ~ lwt + smoke + ht + ui + race.cat + preterm