2 Decision threshold

# Load necessary libraries
library(mlbench)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
# Load the Pima Indians Diabetes dataset
data("PimaIndiansDiabetes")
# Narrow down the dataset
PimaIndiansDiabetes <- PimaIndiansDiabetes %>%
  select(glucose, diabetes)

# Convert the outcome variable to binary (0 = neg, 1 = pos)
PimaIndiansDiabetes <- PimaIndiansDiabetes %>%
  mutate(diabetes_dummy = ifelse(diabetes == "pos", 1, 0))

# Fit a logistic regression model
logistic_model <- glm(diabetes_dummy ~ glucose, data = PimaIndiansDiabetes, family = binomial)

# Predict probabilities
PimaIndiansDiabetes <- PimaIndiansDiabetes %>%
  mutate(predicted_probability = predict(logistic_model, type = "response"))


# Create classifications for different thresholds
PimaIndiansDiabetes <- PimaIndiansDiabetes %>%
  mutate(
    class_0_5 = ifelse(predicted_probability >= 0.5, 1, 0),
    class_0_3 = ifelse(predicted_probability >= 0.3, 1, 0),
    class_0_7 = ifelse(predicted_probability >= 0.7, 1, 0)
  )

What is hapenning when we increase the threshold? See how the number of correct (true y and predicted y) change!

table(PimaIndiansDiabetes$diabetes_dummy, PimaIndiansDiabetes$class_0_3)
   
      0   1
  0 331 169
  1  66 202
table(PimaIndiansDiabetes$diabetes_dummy, PimaIndiansDiabetes$class_0_5)
   
      0   1
  0 443  57
  1 138 130
table(PimaIndiansDiabetes$diabetes_dummy, PimaIndiansDiabetes$class_0_7)
   
      0   1
  0 484  16
  1 195  73
Back to top