-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathRegression Plot.R
151 lines (130 loc) · 7.29 KB
/
Regression Plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
# An R macro for creating a plot of a regression, which is automatically saved as an SVG file
# It shows the line of best fit as well as higher-order polynomials. It shows the confidence interval and data points.
# If x or y is a latent variable, use the mean of the intercepts as the x offset (x.off)
# Data ----
# This list contains the results from the regression model (probably obtained in another program)
dataset <- list(
y.name = "Knowledge Sharing", # The name of the dependent variable
y.intercept = 4.032, # The intercept of the dependent variable
y.resvar = 2, # The residual variance / mean squared error of Y
predictors = data.frame(name = c("AGE", "GENDER", "TIMEWORK", "LGO2", "High Performance HRM", "HXL", "SCH2", "SCH3", "SCH4", "SCH5", "SCH6", "TEAMSIZE", "HR Strength", "HxS", "HxH"),
mean = c(48.556, 0.636, 5.977, 0, 0, 0.044, 0.359, 0.109, 0.125, 0.109, 0.125, 10.75, 0, 0, 0),
var = c(105.173, 0.232, 3.428, 0.475, 0.274, 0.135, 0.23, 0.097, 0.109, 0.097, 0.109, 33.562, 0.199, 0, 0),
beta = c(-0.01, 0.025, 0.009, 0.156, 0.133, -0.19, 0.127, -0.275, -0.17, 0.075, 0.227, 0.006, 0.006, 0.133, 0.1),
x.off = c(0, 0, 0, 0, 3.324, 0, 0, 0, 0, 0, 0, 0, 2.63, 0, 0)),
x = c(5, 15), # The X variables. Each entry represents a different order of polynomial. The first entry is X, the second is X^2, the third is X^3 and so on.
n = 200 # The sample size
)
# This data frame contains the actual X and Y data points
point_data <- data.frame(
x = c(-1.5, -1.75, -1, 1, 1.75, -2, 2),
y = c(-0.7, -0.3, -0.4, -0.3, -0.5, -0.7, 0)
)
# Settings----
image_width = 6 # The width of the image (in inches)
height_ratio = (1 + sqrt(5))/2 # Set the ratio to phi by default
image_height = image_width / height_ratio # The height of the image (in inches)
file_type = "svg" # The file type. It is recommended to use a vector format (e.g., "svg" or "emf") or high-quality image (e.g., "png")
ylim_lower <- NA #The lower limit of the Y-axis (leave as NA for it to be automatically set)
ylim_upper <- NA #The upper limit of the Y-axis (leave as NA for it to be automatically set)
xlim_lower <- NA #The lower limit of the X-axis (leave as NA for it to be automatically set)
xlim_upper <- NA #The upper limit of the Y-axis (leave as NA for it to be automatically set)
x_lower <- min(point_data$x) # The lower limit of the line of best fit on the X axis (defaults to smallest X value)
x_upper <- max(point_data$x) # The upper limit of the line of best fit on the X axis (defaults to largest X value)
points <- 20 # The number of points on the plot line (more means better resolution)
cinterval <- 0.95 # The confidence interval
# Determine plot file name ----
file_name <- paste0(dataset$y.name,
" on ",
dataset$predictors$name[dataset$x[1]],
".",
file_type)# The name of the file to be created
# Offset the points data (if required)
point_data$x <- point_data$x + dataset$predictors$x.off[dataset$x[1]]
# Load Packages----
load.package <- function(x)
{
if (!require(x,character.only = TRUE))
{
install.packages(x,dep=TRUE)
if(!require(x,character.only = TRUE)) stop("Package not found")
}
}
load.package ("ggplot2")
load.package ("lemon")
# Functions----
# Confidence / Prediction interval
# Set "pred_interval" to FALSE for a confidence interval, TRUE for a prediction interval
confidence.interval <- function(dataset, x_lower = 1, x_upper = 5, points = 20, cinterval = .975, pred_interval = FALSE) {
x <- c()
ci <- c()
b <- dataset$x[1]
for (a in 0:(points-1)) {
x[a+1] <- x_lower + a*(x_upper - x_lower)/(points-1)
ci[a+1] <- qt(cinterval, dataset$n)*sqrt(dataset$y.resvar*(pred_interval + (1/dataset$n)+(x[a+1]-dataset$predictors$mean[b])^2/(dataset$n-2)*dataset$predictors$var[b]))
}
return(ci)
}
# Returns the line of prediction, plus confidence intervals
predict.data <- function(dataset, x_lower = 1, x_upper = 5, points = 20, cinterval = .975) {
x <- c()
y_pred <- c()
for (a in 0:(points-1)) {
x[a+1] <- x_lower + a*(x_upper - x_lower)/(points-1)
y_pred[a+1] <- 0
for (b in 1:length(dataset$predictors$name)) {
for (c in 1:length(dataset$x)) {
if (b == dataset$x[c]) {
y_pred[a+1] <- y_pred[a+1] + dataset$predictors$beta[b]*x[a+1]^c
}
else {
y_pred[a+1] <- y_pred[a+1] + dataset$predictors$beta[b]*dataset$predictors$mean[b]
}
}
}
}
ci = confidence.interval(dataset, x_lower, x_upper, points, cinterval)
return(data.frame(x = x + dataset$predictors$x.off[dataset$x[1]], y_pred = y_pred, y_lower = y_pred - ci, y_upper = y_pred + ci))
}
# Include themes----
theme.plain <- theme(axis.line.x = element_line(colour = "black"), # Add in x axis
axis.line.y = element_line(colour = "black"), # Add in y axis
text = element_text(family="sans"), # Set the font (options are: "mono", "serif", "sans"
panel.grid.major = element_blank(), # Delete major grid
panel.grid.minor = element_blank(), # Delete minor grid
panel.border = element_blank(), # Delete border
panel.background = element_blank(), # Make background blank
legend.key = element_blank(),
plot.margin = margin(5, 5, 5, 5, "pt"),
strip.background = element_blank(), # Make the facet heading blank
strip.placement = "outside", # For faceting, it will put the facet label outside of the Y-axis
panel.spacing = unit(1, "lines")) # Move facets further apart
# Create the GGPlot----
regression_plot <- function(plot.data, point.data = NA) {
plot1 <- ggplot()
if (exists("point.data")) {
plot1 = plot1 + geom_point(data = point.data, aes(x = x, y = y))
}
plot1 = plot1 +
geom_ribbon(data = plot.data, aes(x = x, ymin = y_lower, ymax = y_upper), fill = "grey95", alpha = 0.5) +
geom_smooth(data = plot.data, aes(x = x, y = y_lower), size = 0.5, color = "black", linetype = "dashed", se = FALSE, method = "loess", formula = "y ~ x") +
geom_smooth(data = plot.data, aes(x = x, y = y_upper), size = 0.5, color = "black", linetype = "dashed", se = FALSE, method = "loess", formula = "y ~ x")
plot1 = plot1 +
geom_smooth(data = plot.data, aes(x = x, y = y_pred), size = 0.75, color = "black", se = FALSE, method = "loess", formula = "y ~ x") +
xlab(dataset$predictors$name[dataset$x[1]]) +
ylab(dataset$y.name) +
theme.plain
if (!is.na(ylim_lower) | !is.na(ylim_upper)) {plot1 = plot1 + ylim(ylim_lower, ylim_upper)}
if (!is.na(xlim_lower) | !is.na(xlim_upper)) {plot1 = plot1 + xlim(xlim_lower, xlim_upper)}
print(plot1)
}
# Generate the prediction line and the confidence/prediction intervals
dataset_predict = predict.data(dataset, x_lower, x_upper, points, 1-((1-cinterval)/2))
# Generate the plot
image <- regression_plot(dataset_predict, point_data)
# Save the image
ggsave(filename = file_name,
plot = image,
units = "in",
width = image_width,
height = image_height)