-
Notifications
You must be signed in to change notification settings - Fork 13
/
Copy pathsupervised_classification.R
executable file
·217 lines (158 loc) · 6.91 KB
/
supervised_classification.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
### ##########################################################################################
### Script for classification of satellite data
###
### based on a script by: Dr. Ned Norning, Dr. Martin Wegmann
### first version approx. in 2011
###
### www.remote-sensing-biodiversity.org
##############################################################################################
##############################################################################################
### DOCUMENTATION
#####################################################################################
# The script reads a Shapefile holding the samples (defined by: attName).
# Raster image that contains environmental variables (continuous, categorical) (defined by: satImage).
# For each sample the data values for that pixel are determied and these data are used to run
# the model.
#####################################################################################
###########################
### ###
### Set variables ###
### ###
###########################
#-----------------------------------------------------------------------------------------------
# within the CIP pool install.packages() need to be done prior the library() function
# the following command is just a new defined command by you which checks if the package is installed and if not, it installs the package automatically
loadandinstall <- function(mypkg) {if (!is.element(mypkg, installed.packages()[,1])){install.packages(mypkg)}; library(mypkg, character.only=TRUE) }
loadandinstall("maptools")
loadandinstall("sp")
loadandinstall("randomForest")
loadandinstall("raster")
loadandinstall("rgdal")
loadandinstall("mgcv")
#-----------------------------------------------------------------------------------------------
####### Start Classification ########
###########################
### ###
### Set variables and settings ###
### ###
###########################
# install QGIS, load the satellite data and define a cropping area and training data within this region
# think about a validation set
# set main folder
#_____________________________________________________________________________
# set your working directory
# assign path to variable "workingdir"
workingdir <- ("/...../script/") # optional
# set working directory using the variable name
setwd(workingdir) # optional
#### path to the shape training data using the relative path
path_shapefile <- 'vector_data/path/to/file'
shapefile <- 'shapefile_without_suffix'
# Read the Shapefile
#using the link defined above
vec <- readOGR("path_shapefile, shapefile
# import data - several choices:
# import the whole stack of bands
satImage <- brick("name_of_satellite_image")
# in case you have just one file with various bands:
# raster1 <- raster("raster_data/crop_p224p63_b1.tif")
# raster2 <- raster("raster_data/crop_p224p63_b2.tif")
# raster3 <- raster("raster_data/crop_p224p63_b3.tif")
# raster4 <- raster("raster_data/crop_p224p63_b4.tif")
# or if you just want to import some bands in a big stack of bands:
# raster1 <- raster("raster_data/crop_p224p63.tif",band=1)
# raster2 <- raster("raster_data/crop_p224p63.tif",band=2)
# raster3 <- raster("raster_data/crop_p224p63.tif",band=3)
# raster4 <- raster("raster_data/crop_p224p63.tif",band=4)
# otherwise skip the "band=.." part and just add the correct file name for each band.
# stack the bands together (layer stacking)
# if more than 4 raster are imported, add another file name
# satImage <- stack(raster1,raster2, raster3,raster4)
#################
#### import finished ###
#################
### Number of samples per land cover class - number up to you and depending on study area size
numsamps <- 100
### Name of the attribute that holds the integer land cover type identifyer (very important to consider when doing the training data sets)
attName <- 'id'
### Name and path of the output GeoTiff image - just a variable for later usage
outImage <- 'results/classif_result.tif'
#################
#### settings set - finished ###
#################
#-----------------------------------------------------------------------------------------------
###################################
### ###
### Start processing ###
### ###
###################################
# Create vector of unique land cover attribute values
allAtt <- slot(vec, "data")
tabAtt <-table(allAtt[[attName]])
uniqueAtt <-as.numeric(names(tabAtt))
### Create input data from a Shapefile with training data and the image ("generation points for Regions of Interest")
for (x in 1:length(uniqueAtt)) {
# Create a mask for cover type 1 (an integer attribute)
class_data <- vec[vec[[attName]]==uniqueAtt[x],]
# Get random points for the class
classpts <- spsample(class_data, type="random", n=numsamps)
#Append data for all but the first run
if (x == 1) {
xy <- classpts
} else {
xy <- rbind(xy, classpts)
}
}
# # # for testing if you want
# summary(xy)
# plot(xy)
# image(satImage)
# plot(xy, add=TRUE)
### plot the random generated points on one of the rasters - for visual checking only
pdf("results/randompoints.pdf")
plot(satImage,2)
points(xy)
dev.off()
# !!! resulting image NOT in the plot window but in a pdf outside your R session - see settings on top about location of results
##############
#
# Get class number for each sample point
temp <- over(x=xy,y=vec)
summary(temp)
#############
#
# Create the responce input for randomForest
response <- factor(temp[[attName]])
#############
trainvals <- cbind(response, extract(satImage, xy)) #what is behind certain values (1-water and the value for each band)
# check for consistency
head(trainvals)
#############
#
# Run Random Forest
# think about different methods (SVM, BRT, GLM, ...)
# compare the results
print("Starting to calculate random forest object")
randfor <- randomForest(as.factor(response) ~. , #glm, car, gam, any statistics you want..
data=trainvals, #a tree gives all breaking point (how to split table) ##library(tree), cmd: #tree()
na.action=na.omit, # if pixels with no values exist
confusion=T) # first info how good your classification performed
randfor #to look at results (confusion matrix: 100 good, all class A points are within class A)
# ### further settings which can be defined - see: randomForest manual
# model_1 <-randomForest(x=input,
# ntree=Treesize,
# nodesize=Nodesize,
# importance=TRUE,
# proximity=FALSE,
# mtry=Feature_OOB,
# confusion=TRUE,
# do.trace=TRUE,
# norm.votes=TRUE)
#
# ### other method could be applied and compared like glm(), gam(), SVM, tree etc.
#############
#
# Start predictions
print("Starting predictions")
predict(satImage, randfor, filename=outImage, progress='text', format='GTiff', datatype='INT1U', type='response', overwrite=TRUE)
####### End of Classification ########