BranchGLM Vignette

1 Fitting GLMs

1.1 Optimization methods

1.2 Initial values

1.3 Parallel computation

2 Families

2.1 Gaussian

# Loading in BranchGLM
library(BranchGLM)

# Fitting gaussian regression models for mtcars dataset
cars <- mtcars

## Identity link
BranchGLM(mpg ~ ., data = cars, family = "gaussian", link = "identity")
#> Results from gaussian regression with identity link function 
#> Using the formula mpg ~ .
#> 
#>             Estimate       SE       t p.values  
#> (Intercept) 12.30000 15.16000  0.8114  0.42620  
#> cyl         -0.11140  0.84660 -0.1316  0.89650  
#> disp         0.01334  0.01447  0.9218  0.36710  
#> hp          -0.02148  0.01763 -1.2180  0.23670  
#> drat         0.78710  1.32500  0.5941  0.55880  
#> wt          -3.71500  1.53500 -2.4210  0.02462 *
#> qsec         0.82100  0.59210  1.3870  0.18010  
#> vs           0.31780  1.70500  0.1864  0.85390  
#> am           2.52000  1.66600  1.5130  0.14530  
#> gear         0.65540  1.21000  0.5418  0.59370  
#> carb        -0.19940  0.67140 -0.2970  0.76940  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Dispersion parameter taken to be 4.6092
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#> 
#> Residual Deviance: 147.49 on 21 degrees of freedom
#> AIC: 163.71

2.2 Gamma

# Fitting gamma regression models for mtcars dataset
## Inverse link
GammaFit <- BranchGLM(mpg ~ ., data = cars, family = "gamma", link = "inverse")
GammaFit
#> Results from gamma regression with inverse link function 
#> Using the formula mpg ~ .
#> 
#>               Estimate         SE       t p.values  
#> (Intercept)  6.794e-02  3.085e-02  2.2020  0.03898 *
#> cyl         -1.760e-03  1.946e-03 -0.9048  0.37590  
#> disp         7.947e-06  3.515e-05  0.2261  0.82330  
#> hp           6.724e-05  4.050e-05  1.6600  0.11170  
#> drat         4.283e-04  2.728e-03  0.1570  0.87670  
#> wt           9.224e-03  3.360e-03  2.7450  0.01214 *
#> qsec        -1.739e-03  1.165e-03 -1.4930  0.15040  
#> vs           3.086e-04  3.322e-03  0.0929  0.92690  
#> am          -6.305e-04  3.441e-03 -0.1832  0.85640  
#> gear        -4.882e-03  2.577e-03 -1.8940  0.07203 .
#> carb         1.025e-03  1.481e-03  0.6926  0.49610  
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Dispersion parameter taken to be 0.0087
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#> 
#> Residual Deviance: 0.28 on 21 degrees of freedom
#> AIC: 152.3
#> Algorithm converged in 3 iterations using Fisher's scoring

## Log link
GammaFit <- BranchGLM(mpg ~ ., data = cars, family = "gamma", link = "log")
GammaFit
#> Results from gamma regression with log link function 
#> Using the formula mpg ~ .
#> 
#>               Estimate         SE       t  p.values    
#> (Intercept)  2.754e+00  6.934e-01  3.9720 0.0006942 ***
#> cyl          7.509e-03  3.871e-02  0.1940 0.8481000    
#> disp         6.521e-05  6.615e-04  0.0986 0.9224000    
#> hp          -8.898e-04  8.064e-04 -1.1030 0.2823000    
#> drat         2.366e-02  6.058e-02  0.3906 0.7000000    
#> wt          -1.655e-01  7.017e-02 -2.3590 0.0281100 *  
#> qsec         3.055e-02  2.707e-02  1.1290 0.2718000    
#> vs          -1.141e-03  7.796e-02 -0.0146 0.9885000    
#> am           5.421e-02  7.618e-02  0.7115 0.4846000    
#> gear         6.014e-02  5.531e-02  1.0870 0.2893000    
#> carb        -2.277e-02  3.070e-02 -0.7418 0.4664000    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Dispersion parameter taken to be 0.0096
#> 32 observations used to fit model
#> (0 observations removed due to missingness)
#> 
#> Residual Deviance: 0.31 on 21 degrees of freedom
#> AIC: 155.65
#> Algorithm converged in 2 iterations using Fisher's scoring

2.3 Poisson

# Fitting poisson regression models for warpbreaks dataset
warp <- warpbreaks

## Log link
BranchGLM(breaks ~ ., data = warp, family = "poisson", link = "log")
#> Results from poisson regression with log link function 
#> Using the formula breaks ~ .
#> 
#>             Estimate       SE      z  p.values    
#> (Intercept)  3.69200  0.04541 81.300 < 2.2e-16 ***
#> woolB       -0.20600  0.05157 -3.994 6.490e-05 ***
#> tensionM    -0.32130  0.06027 -5.332 9.729e-08 ***
#> tensionH    -0.51850  0.06396 -8.107 5.209e-16 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Dispersion parameter taken to be 1
#> 54 observations used to fit model
#> (0 observations removed due to missingness)
#> 
#> Residual Deviance: 210.39 on 50 degrees of freedom
#> AIC: 493.06
#> Algorithm converged in 3 iterations using Fisher's scoring

2.4 Binomial

# Fitting binomial regression models for toothgrowth dataset
Data <- ToothGrowth

## Logit link
BranchGLM(supp ~ ., data = Data, family = "binomial", link = "logit")
#> Results from binomial regression with logit link function 
#> Using the formula supp ~ .
#> 
#>             Estimate      SE      z p.values   
#> (Intercept)   1.5380  0.7860  1.956 0.050440 . 
#> len          -0.2127  0.0728 -2.921 0.003484 **
#> dose          2.0890  0.8497  2.458 0.013970 * 
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Dispersion parameter taken to be 1
#> 60 observations used to fit model
#> (0 observations removed due to missingness)
#> 
#> Residual Deviance: 72.33 on 57 degrees of freedom
#> AIC: 78.33
#> Algorithm converged in 4 iterations using Fisher's scoring

## Probit link
BranchGLM(supp ~ ., data = Data, family = "binomial", link = "probit")
#> Results from binomial regression with probit link function 
#> Using the formula supp ~ .
#> 
#>             Estimate       SE      z p.values   
#> (Intercept)  0.94020  0.46900  2.005 0.045000 * 
#> len         -0.13180  0.04195 -3.142 0.001678 **
#> dose         1.30800  0.49710  2.631 0.008502 **
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Dispersion parameter taken to be 1
#> 60 observations used to fit model
#> (0 observations removed due to missingness)
#> 
#> Residual Deviance: 72.19 on 57 degrees of freedom
#> AIC: 78.19
#> Algorithm converged in 5 iterations using Fisher's scoring

2.4.1 Functions for binomial GLMs

  • BranchGLM has some utility functions for binomial GLMs
    • Table() creates a confusion matrix based on the predicted classes and observed classes
    • ROC() creates an ROC curve which can be plotted with plot()
    • AUC() and Cindex() calculate the area under the ROC curve
    • MultipleROCCurves() allows for the plotting of multiple ROC curves on the same plot

2.4.1.1 Table

# Fitting logistic regression model for toothgrowth dataset
catFit <- BranchGLM(supp ~ ., data = Data, family = "binomial", link = "logit")

Table(catFit)
#> Confusion matrix:
#> ----------------------
#>             Predicted
#>              OJ   VC
#> 
#>          OJ  17   13
#> Observed
#>          VC  7    23
#> 
#> ----------------------
#> Measures:
#> ----------------------
#> Accuracy:  0.6667 
#> Sensitivity:  0.7667 
#> Specificity:  0.5667 
#> PPV:  0.6389

2.4.1.2 ROC

# Creating ROC curve
catROC <- ROC(catFit)

plot(catROC, main = "ROC Curve", col = "indianred")

2.4.1.3 Cindex/AUC

# Getting Cindex/AUC
Cindex(catFit)
#> [1] 0.7127778

AUC(catFit)
#> [1] 0.7127778

2.4.1.4 MultipleROCPlots

# Showing ROC plots for logit, probit, and cloglog
probitFit <- BranchGLM(supp ~ . ,data = Data, family = "binomial", 
                       link = "probit")

cloglogFit <- BranchGLM(supp ~ . ,data = Data, family = "binomial", 
                       link = "cloglog")

MultipleROCCurves(catROC, ROC(probitFit), ROC(cloglogFit), 
                  names = c("Logistic ROC", "Probit ROC", "Cloglog ROC"))

2.4.1.5 Using predictions

  • For each of the methods used in this section predicted probabilities and observed classes can also be supplied instead of the BranchGLM object.

preds <- predict(catFit)

Table(preds, Data$supp)
#> Confusion matrix:
#> ----------------------
#>             Predicted
#>              OJ   VC
#> 
#>          OJ  17   13
#> Observed
#>          VC  7    23
#> 
#> ----------------------
#> Measures:
#> ----------------------
#> Accuracy:  0.6667 
#> Sensitivity:  0.7667 
#> Specificity:  0.5667 
#> PPV:  0.6389

AUC(preds, Data$supp)
#> [1] 0.7127778

ROC(preds, Data$supp) |> plot(main = "ROC Curve", col = "deepskyblue")

3 Useful functions

# Predict method
predict(GammaFit)
#>           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
#>            21.69333            21.15566            25.92103            20.27496 
#>   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
#>            17.15124            19.83386            14.55730            22.26169 
#>            Merc 230            Merc 280           Merc 280C          Merc 450SE 
#>            23.89767            18.75674            19.10374            15.10160 
#>          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
#>            16.07372            16.13726            12.20297            11.70443 
#>   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
#>            11.60706            27.90334            30.14896            30.14451 
#>       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
#>            23.41000            17.02230            17.63782            13.90043 
#>    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
#>            16.06923            28.65170            26.61054            28.59460 
#>      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
#>            17.89013            19.53099            14.11845            23.30350

# Accessing coefficients matrix
GammaFit$coefficients
#>                  Estimate           SE           t   p.values
#> (Intercept)  2.754211e+00 0.6933540659  3.97230023 0.00069416
#> cyl          7.509180e-03 0.0387101010  0.19398501 0.84805180
#> disp         6.521183e-05 0.0006614834  0.09858423 0.92240336
#> hp          -8.897889e-04 0.0008063589 -1.10346503 0.28231007
#> drat         2.366270e-02 0.0605780301  0.39061524 0.70001605
#> wt          -1.655137e-01 0.0701735211 -2.35863431 0.02811042
#> qsec         3.055119e-02 0.0270721947  1.12850802 0.27183333
#> vs          -1.140739e-03 0.0779559038 -0.01463313 0.98846300
#> am           5.420526e-02 0.0761831300  0.71151268 0.48459603
#> gear         6.013892e-02 0.0553138294  1.08723110 0.28925667
#> carb        -2.277255e-02 0.0306989241 -0.74180288 0.46642281