Skip to content

Commit e03ea0c

Browse files
authoredJun 17, 2020
Add files via upload
1 parent 9d5647f commit e03ea0c

2 files changed

+530
-0
lines changed
 
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,273 @@
1+
##
2+
## title: "Algorithms from scratch using Gradient Descent to predict average GPU Run Time & classify it's run type"
3+
## author: "Sarthak Mohapatra"
4+
## date: "1/29/2020"
5+
##
6+
7+
options(scipen = 999)
8+
9+
##
10+
## Loading the required packages.
11+
##
12+
13+
pacman::p_load(data.table, forecast, leaps, tidyverse, caret, corrplot, glmnet, mlbench, ggplot2, gplots, pivottabler,MASS,
14+
e1071, fpp2, gains, pROC, knitr, gplots, FNN, RColorBrewer, viridis, cowplot, ggpubr, gridExtra, rlist, d3heatmap)
15+
16+
17+
##
18+
## Importing the dataset from the working directory
19+
##
20+
21+
setwd('D:/Second Semester - MSBA - UTD/Applied Machine Learning/Assignment 1/sgemm_product_dataset')
22+
gpu.df <- read.csv("sgemm_product.csv")
23+
head(gpu.df)
24+
25+
##
26+
## Renaming the last 4 column names
27+
##
28+
29+
names(gpu.df)[15] = "Run1"
30+
names(gpu.df)[16] = "Run2"
31+
names(gpu.df)[17] = "Run3"
32+
names(gpu.df)[18] = "Run4"
33+
head(gpu.df)
34+
35+
##
36+
## Creating a new feature Average. It will contain the average of Run1 through Run4
37+
##
38+
39+
gpu.df$Average <- (gpu.df$Run1 + gpu.df$Run2 + gpu.df$Run3 + gpu.df$Run4) / 4
40+
head(gpu.df)
41+
42+
##
43+
## Data Partioning
44+
##
45+
46+
set.seed(16)
47+
48+
##
49+
## randomly order the dataset
50+
##
51+
52+
rows <- sample(nrow(gpu.df))
53+
gpu <- gpu.df[rows, -15:-18]
54+
55+
##
56+
## find rows to split on
57+
##
58+
59+
split <- round(nrow(gpu) * 0.7)
60+
gpu.train.df <- gpu[1:split, ]
61+
gpu.test.df <- gpu[(split+1):nrow(gpu), ]
62+
63+
##
64+
## confirm the size of the split
65+
##
66+
67+
round(nrow(gpu.train.df)/nrow(gpu), digits = 3)
68+
head(gpu.train.df)
69+
head(gpu.test.df)
70+
71+
##
72+
## Normalizing the dataset.
73+
##
74+
75+
gpu_train_norm <- gpu.train.df
76+
gpu_test_norm <- gpu.test.df
77+
gpu_norm_df <- gpu
78+
79+
norm.values <- preProcess(gpu.train.df[, 1:15], method=c("center", "scale"))
80+
gpu_train_norm[, 1:15] <- predict(norm.values, gpu.train.df[, 1:15])
81+
gpu_test_norm[, 1:15] <- predict(norm.values, gpu.test.df[, 1:15])
82+
gpu_norm_df[, 1:15] <- predict(norm.values, gpu[, 1:15])
83+
new.gpu.norm.df <- predict(norm.values, gpu)
84+
85+
##
86+
## Creating the feature and target datasets ( X & Y)
87+
##
88+
89+
x_gpu_train <- as.matrix(gpu_train_norm[c(1:14)])
90+
y_gpu_train <- as.matrix(gpu_train_norm[c('Average')])
91+
92+
x_gpu_test <- as.matrix(gpu_test_norm[c(1:14)])
93+
y_gpu_test <- as.matrix(gpu_test_norm[c('Average')])
94+
95+
x_gpu_train <- cbind(Intercept=1,x_gpu_train)
96+
head(x_gpu_train)
97+
head(y_gpu_train)
98+
x_gpu_test <- cbind(Intercept=1, x_gpu_test)
99+
head(x_gpu_test)
100+
length(y_gpu_train)
101+
length(y_gpu_test)
102+
103+
104+
##
105+
## Converting the problem statement to a binary class problem.
106+
##
107+
108+
##
109+
## If the average run time of the record is less than Median value, it is given class 0 (low run type) and if greater or equal, it is termed as 1(high run type)
110+
##
111+
112+
median.input <- median(gpu_norm_df$Average)
113+
median.input
114+
115+
x.train.gpu.logit <- x_gpu_train
116+
y.train.gpu.logit <- y_gpu_train
117+
118+
head(y.train.gpu.logit)
119+
y.train.gpu.logit <- ifelse(y.train.gpu.logit <= median.input, 0, 1)
120+
head(y.train.gpu.logit)
121+
122+
123+
x.test.gpu.logit <- x_gpu_test
124+
y.test.gpu.logit <- y_gpu_test
125+
head(y.test.gpu.logit)
126+
y.test.gpu.logit <- ifelse(y.test.gpu.logit <= median.input, 0, 1)
127+
head(y.test.gpu.logit)
128+
129+
130+
131+
##
132+
## The Below code chunks is the implementation of the Gradient Descent method. Based on the experimentation performed, the best alpha selected for demonstration here
133+
## is alpha = 0.0001 and the threshold as thold = 0.000001.
134+
##
135+
136+
##
137+
## Here, we are defining the Gradient Descent algorithm. First, we are declaring the variables to store cost, beta co-efficients, predicted target variable value and error.
138+
##
139+
140+
gradient_descent <- function(x, y, alpha, m, beta, thold)
141+
{
142+
cost_iter <<- list()
143+
beta_iter <<- matrix(0,nrow=m,ncol=15)
144+
yhat_iter <<- list()
145+
error_iter <<- list()
146+
##
147+
## We are iterating over the matrices with the goal of minimizing the cost function value.
148+
##
149+
for (i in 1:10000){
150+
151+
yhat <- 1 / (1 + exp(-(as.matrix(x) %*% beta_value))) ## Predictions of target variable.
152+
yhat_iter[i] <- yhat ## Storing the predicted value.
153+
154+
error <- yhat - y ## Calculating the error value.
155+
error_iter[i] <- error ## Storing the error value.
156+
157+
cost <- -1 * (1/m) * sum( y*log(yhat) + (1-y)*log(1-yhat) ) ## Calculating the cost function value.
158+
cost_iter[i] <- cost ## storing the cost function value.
159+
160+
beta_value <- beta_value - (alpha * (1/m) * (t(x) %*% (yhat - y))) ## Calculating the new beta coefficinets values.
161+
beta_iter[i,1:15] <- t(beta_value) ## storing the beta coefficients value.
162+
163+
164+
# if ((i > 1) && ((cost_iter[[i-1]] - cost_iter[[i]]) < thold)) {
165+
# print('Threshold reached')
166+
# break
167+
# }
168+
}
169+
170+
final_val <- list(cost_iter, beta_iter, yhat_iter, error_iter) ## Storing the variables in a single variable so that it can be returned.
171+
return (final_val) ## Returning the values.
172+
173+
}
174+
175+
176+
##
177+
## Prediction function for the validation dataset.
178+
##
179+
180+
linear_test_predict <- function(beta_conv_iter, x_gpu_test, y_gpu_test)
181+
{
182+
yhat_test <- 1 / (1 + exp(-(as.matrix(x.test.gpu.logit) %*% beta_conv_iter)))
183+
error_test <- yhat_test - y_gpu_test
184+
cost_test <- (1/(2*length(y_gpu_test))) * sum(y_gpu_test*log(yhat_test) + (1-y_gpu_test)*log(1-yhat_test))
185+
test_val <- list(yhat_test, error_test, cost_test)
186+
187+
return(test_val)
188+
}
189+
190+
191+
##
192+
## Let's define the main function for initializing the initial values of beta-i (slope) and beta-0 (y intercept)
193+
##
194+
195+
main_function <- function(alpha, m, beta_value, thold){
196+
cost_return_train <- list()
197+
beta_return_train <- list()
198+
yhat_return_train <- list()
199+
final_return_train <- list()
200+
201+
cost_return_test <- list()
202+
yhat_return_test <- list()
203+
error_return_test <- list()
204+
205+
final <- list()
206+
final_test <- list()
207+
208+
209+
final <- gradient_descent(x.train.gpu.logit, y.train.gpu.logit, alpha, m, beta, thold)
210+
211+
cost_return_train <- final[[1]]
212+
beta_return_train <- final[[2]]
213+
yhat_return_train <- final[[3]]
214+
error_return_train <- final[[4]]
215+
216+
217+
conv_iter <- length(cost_return_train)
218+
conv_iter
219+
220+
beta_conv_iter <- beta_return_train[conv_iter,1:15]
221+
beta_conv_iter
222+
223+
cost_return_train[conv_iter]
224+
225+
final_test <- linear_test_predict(beta_conv_iter, x_gpu_test, y_gpu_test)
226+
227+
cost_return_test <- final_test[[3]]
228+
yhat_return_test <- final_test[[1]]
229+
error_return_test <- final_test[[2]]
230+
231+
232+
cost_return_test
233+
234+
cost_result <- list(cost_return_train, cost_return_test, conv_iter, beta_conv_iter, yhat_return_test)
235+
return(cost_result)
236+
237+
}
238+
239+
240+
##
241+
## Invoking the main function to apply the Gradient Descent algorithm.
242+
##
243+
244+
thold = 0.0000000001
245+
alpha <- 0.00001
246+
m <- nrow(gpu.train.df)
247+
beta_value <<- rep(0,15)
248+
cost_return <- main_function(alpha, m, beta_value, thold)
249+
cost_return_train <- cost_return[[1]]
250+
cost_return_test <- cost_return[[2]]
251+
conv_iter <- cost_return[[3]]
252+
yhat_test <- cost_return[[4]]
253+
cost_train_0.0001_al <- cost_return_train
254+
cost_train_min_0.0001_al <- cost_return_train[conv_iter]
255+
cost_test_0.0001_al <- cost_return_test
256+
257+
258+
259+
##
260+
## Plotting various performance validation curves
261+
##
262+
263+
plot(1:length(cost_train_0.0001_al), cost_train_0.0001_al, main = 'Cost function convergence at alpha 0.0001.', xlab = 'No. of Iterations', ylab = 'Cost Function value', col='red', type='l', xlim=c(0,10000), ylim=c(0.68,0.7),sub='Convergence Threshold value - 0.000001')
264+
legend("topright",c("alpha=0.0001"),cex=0.7, bty='n', fill=c("red"))
265+
266+
267+
268+
269+
270+
271+
272+
273+

0 commit comments

Comments
 (0)
Please sign in to comment.