Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
How ar we finding the model that best fits teh data? Through optimization!Here we calculate the parameters B0 and B1 using gradient descent of log-loss, gradient ascent of log likelihood and compare it with the fubction glm that already does this for us.
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
# Data preparation
x <- PimaIndiansDiabetes$glucose
y <- PimaIndiansDiabetes$diabetes_dummy# Sigmoid function
sigmoid <- function(z) {
1 / (1 + exp(-z))
}# Gradient descent implementation
gradient_descent <- function(x, y, learning_rate, iterations) { # we can play with these two!
store_beta_0 <- NULL
store_beta_1 <- NULL
beta_0 <- -5 # Initialize intercept #random points! Can play around with these ones too
beta_1 <- 1 # Initialize slope #random points! Can play around with these ones too
for (i in 1:iterations) {
z <- beta_0 + beta_1 * x #linear regression function/logit
y_pred <- sigmoid(z) #logistic regression
grad_beta_0 <- mean(y_pred - y) #partial derivative of intercept
grad_beta_1 <- mean((y_pred - y) * x) #partial derivative of slope
beta_0 <- beta_0 - learning_rate * grad_beta_0 #gradient descent equation! Updating the points
beta_1 <- beta_1 - learning_rate * grad_beta_1
store_beta_0[i] <- beta_0
store_beta_1[i] <- beta_1
}
data.frame(beta_1 = store_beta_1, beta_0 = store_beta_0)
}gd_result <- gradient_descent(x, y, learning_rate = 0.1, iterations = 1000)
#View(gd_result)Minimum point found after 10000 iterations:
[1] "Glucose/Slope:, -1.230859375"
[1] "Intercept: -5.0295657360221"
#ggplot(gd_result %>% add_rownames(var = "instances"), aes( as.numeric(instances), beta_0)) +
# geom_point() #ggplot(gd_result %>% add_rownames(var = "instances"), aes( as.numeric(instances), beta_1)) +
# geom_point() # Gradient ascent implementation
gradient_ascent <- function(x, y, learning_rate = 0.0001, iterations = 1000) {
store_beta_0 <- NULL
store_beta_1 <- NULL
beta_0 <- 0 # Initialize intercept
beta_1 <- 0 # Initialize slope
for (i in 1:iterations) {
z <- beta_0 + beta_1 * x
y_pred <- sigmoid(z)
grad_beta_0 <- mean(y - y_pred)
grad_beta_1 <- mean((y - y_pred) * x)
beta_0 <- beta_0 + learning_rate * grad_beta_0
beta_1 <- beta_1 + learning_rate * grad_beta_1
store_beta_0[i] <- beta_0
store_beta_1[i] <- beta_1
}
data.frame(beta_1 = store_beta_1, beta_0 = store_beta_0)
}What do you notice as difference? The sign!
ga_result <- gradient_ascent(x, y, learning_rate = 0.01, iterations = 10000)
View(ga_result)paste0("Glucose/Slope:, ", ga_result$beta_1[10000])[1] "Glucose/Slope:, -0.516772238232657"
paste0("Intercept: ", ga_result$beta_0[10000])[1] "Intercept: -5.52289060218183"
SAME!!!
If we fit the logistic regression model as before we get:
# Fit a logistic regression model using glm
glm_model <- glm(diabetes_dummy ~ glucose, data = PimaIndiansDiabetes, family = binomial)
coef(glm_model)(Intercept) glucose
-5.35008039 0.03787304
---
title: "3 Gradients"
format: html
---
How ar we finding the model that best fits teh data? Through optimization!Here we calculate the parameters B0 and B1 using gradient descent of log-loss, gradient ascent of log likelihood and compare it with the fubction `glm` that already does this for us.
```{r}
# Load necessary libraries
library(mlbench)
library(dplyr)
library(ggplot2)
# Load the Pima Indians Diabetes dataset
data("PimaIndiansDiabetes")
PimaIndiansDiabetes <- PimaIndiansDiabetes %>%
select(glucose, diabetes) %>%
mutate(diabetes_dummy = ifelse(diabetes == "pos", 1, 0))
```
```{r}
# Data preparation
x <- PimaIndiansDiabetes$glucose
y <- PimaIndiansDiabetes$diabetes_dummy
```
```{r}
# Sigmoid function
sigmoid <- function(z) {
1 / (1 + exp(-z))
}
```
```{r}
# Log-loss function (for gradient descent) - coded equation from slides
log_loss <- function(beta_0, beta_1, x, y) {
z <- beta_0 + beta_1 * x
y_pred <- sigmoid(z)
-sum(y * log(y_pred) + (1 - y) * log(1 - y_pred))
}
```
```{r}
# Log-likelihood function (for gradient ascent)
log_likelihood <- function(beta_0, beta_1, x, y) {
z <- beta_0 + beta_1 * x
y_pred <- sigmoid(z)
sum(y * log(y_pred) + (1 - y) * log(1 - y_pred))
}
```
```{r}
# Gradient descent implementation
gradient_descent <- function(x, y, learning_rate, iterations) { # we can play with these two!
store_beta_0 <- NULL
store_beta_1 <- NULL
beta_0 <- -5 # Initialize intercept #random points! Can play around with these ones too
beta_1 <- 1 # Initialize slope #random points! Can play around with these ones too
for (i in 1:iterations) {
z <- beta_0 + beta_1 * x #linear regression function/logit
y_pred <- sigmoid(z) #logistic regression
grad_beta_0 <- mean(y_pred - y) #partial derivative of intercept
grad_beta_1 <- mean((y_pred - y) * x) #partial derivative of slope
beta_0 <- beta_0 - learning_rate * grad_beta_0 #gradient descent equation! Updating the points
beta_1 <- beta_1 - learning_rate * grad_beta_1
store_beta_0[i] <- beta_0
store_beta_1[i] <- beta_1
}
data.frame(beta_1 = store_beta_1, beta_0 = store_beta_0)
}
```
```{r}
gd_result <- gradient_descent(x, y, learning_rate = 0.1, iterations = 1000)
#View(gd_result)
```
Minimum point found after 10000 iterations:
```{r}
paste0("Glucose/Slope:, ", gd_result$beta_1[dim(gd_result)[2]])
paste0("Intercept: ", gd_result$beta_0[dim(gd_result)[2]])
```
```{r}
#ggplot(gd_result %>% add_rownames(var = "instances"), aes( as.numeric(instances), beta_0)) +
# geom_point()
```
```{r}
#ggplot(gd_result %>% add_rownames(var = "instances"), aes( as.numeric(instances), beta_1)) +
# geom_point()
```
```{r}
# Gradient ascent implementation
gradient_ascent <- function(x, y, learning_rate = 0.0001, iterations = 1000) {
store_beta_0 <- NULL
store_beta_1 <- NULL
beta_0 <- 0 # Initialize intercept
beta_1 <- 0 # Initialize slope
for (i in 1:iterations) {
z <- beta_0 + beta_1 * x
y_pred <- sigmoid(z)
grad_beta_0 <- mean(y - y_pred)
grad_beta_1 <- mean((y - y_pred) * x)
beta_0 <- beta_0 + learning_rate * grad_beta_0
beta_1 <- beta_1 + learning_rate * grad_beta_1
store_beta_0[i] <- beta_0
store_beta_1[i] <- beta_1
}
data.frame(beta_1 = store_beta_1, beta_0 = store_beta_0)
}
```
What do you notice as difference? The sign!
```{r}
ga_result <- gradient_ascent(x, y, learning_rate = 0.01, iterations = 10000)
View(ga_result)
```
```{r}
paste0("Glucose/Slope:, ", ga_result$beta_1[10000])
paste0("Intercept: ", ga_result$beta_0[10000])
```
SAME!!!
If we fit the logistic regression model as before we get:
```{r}
# Fit a logistic regression model using glm
glm_model <- glm(diabetes_dummy ~ glucose, data = PimaIndiansDiabetes, family = binomial)
coef(glm_model)
```