-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy path03-ann.r
88 lines (65 loc) · 1.96 KB
/
03-ann.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
install.packages("yaImpute")
library(yaImpute)
## Make a couple of bivariate normal classes
rmvn <- function(n, mu=0, V = matrix(1))
{
p <- length(mu)
if(any(is.na(match(dim(V),p))))
stop("Dimension problem!")
D <- chol(V)
matrix(rnorm(n*p), ncol=p) %*% D + rep(mu,rep(n,p))
}
m <- 10000
## Class 1.
mu.1 <- c(20, 40)
V.1 <- matrix(c(-5,1,0,5),2,2); V.1 <- V.1%*%t(V.1)
c.1 <- cbind(rmvn(m, mu.1, V.1), rep(1, m))
head(c.1)
## Class 2.
mu.2 <- c(30, 60)
V.2 <- matrix(c(4,2,0,2),2,2); V.2 <- V.2%*%t(V.2)
c.2 <- cbind(rmvn(m, mu.2, V.2), rep(2, m))
head(c.2)
## Class 3.
mu.3 <- c(15, 60)
V.3 <- matrix(c(5,5,0,5),2,2); V.3 <- V.3%*%t(V.3)
c.3 <- cbind(rmvn(m, mu.3, V.3), rep(3, m))
head(c.3)
c.all <- rbind(c.1, c.2, c.3)
dim(c.all)
max.x <- max(c.all[,1]); min.x <- min(c.all[,1])
max.y <- max(c.all[,2]); min.y <- min(c.all[,2])
## Check them out.
plot(c.1[,1], c.1[,2], xlim=c(min.x, max.x), ylim=c(min.y, max.y),
pch=19, cex=0.5,
col="blue", xlab="Variable 1", ylab="Variable 2")
points(c.2[,1], c.2[,2], pch=19, cex=0.5, col="green")
points(c.3[,1], c.3[,2], pch=19, cex=0.5, col="red")
## Take a reference sample.
n <- 2000
ref <- c.all[sample(1:nrow(c.all), n),]
head(ref,4)
head(c.all[,1:2],3)
## Compare search times
k <- 10
## Do a simple brute force search.
brute <- ann(ref=ref[,1:2], target=c.all[,1:2],
tree.type="brute", k=k, verbose=FALSE)
print(brute$searchTime)
class(brute)
names(brute)
## Do an exact kd-tree search.
kd.exact <- ann(ref=ref[,1:2], target=c.all[,1:2],
tree.type="kd", k=k, verbose=FALSE)
print(kd.exact$searchTime)
## Do an approximate kd-tree search.
kd.approx <- ann(ref=ref[,1:2], target=c.all[,1:2],
tree.type="kd", k=k, eps=100, verbose=FALSE)
print(kd.approx$searchTime)
## Takes too long to calculate for this many targets.
## Compare overall accuracy of the exact vs. approximate search
knn.mode <- function(knn.indx, ref){
x <- ref[knn.indx,]
as.numeric(names(sort(as.matrix(table(x))[,1],
decreasing=TRUE))[1])
}