Skip to content

Commit b7549b0

Browse files
committed
added solr access
1 parent c45197b commit b7549b0

20 files changed

+221
-49
lines changed

Diff for: .classpath

+15-5
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,26 @@
11
<?xml version="1.0" encoding="UTF-8"?>
22
<classpath>
33
<classpathentry excluding="file/reporting/CfEvaluator.java|processing/CfCalculator.java|processing/CfTagCalculator.java|processing/LdaCalculator.java|file/filtering/ResourceFiltering.java|file/filtering/TagFiltering.java|processing/MahoutCalculator.java" kind="src" path="src"/>
4-
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
4+
<classpathentry exported="true" kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/JavaSE-1.8"/>
55
<classpathentry exported="true" kind="lib" path="lib/opencsv-2.3.jar"/>
66
<classpathentry exported="true" kind="lib" path="lib/commons-lang3-3.1.jar"/>
77
<classpathentry exported="true" kind="lib" path="lib/guava-14.0-rc1.jar"/>
88
<classpathentry exported="true" kind="lib" path="lib/mallet-deps.jar"/>
99
<classpathentry exported="true" kind="lib" path="lib/mallet.jar"/>
1010
<classpathentry exported="true" kind="lib" path="lib/javaml-0.1.5.jar"/>
11-
<classpathentry kind="lib" path="lib/Jama-1.0.3.jar"/>
12-
<classpathentry kind="lib" path="lib/vecmath-1.5.1.jar"/>
13-
<classpathentry kind="lib" path="lib/json-simple-1.1.1.jar"/>
14-
<classpathentry kind="lib" path="lib/mysql-connector-java-5.1.36-bin.jar"/>
11+
<classpathentry exported="true" kind="lib" path="lib/Jama-1.0.3.jar"/>
12+
<classpathentry exported="true" kind="lib" path="lib/vecmath-1.5.1.jar"/>
13+
<classpathentry exported="true" kind="lib" path="lib/json-simple-1.1.1.jar"/>
14+
<classpathentry exported="true" kind="lib" path="lib/mysql-connector-java-5.1.36-bin.jar"/>
15+
<classpathentry exported="true" kind="lib" path="lib/solr-solrj-4.10.1.jar"/>
16+
<classpathentry exported="true" kind="lib" path="lib/commons-io-2.3.jar"/>
17+
<classpathentry exported="true" kind="lib" path="lib/httpclient-4.3.1.jar"/>
18+
<classpathentry exported="true" kind="lib" path="lib/slf4j-api-1.7.6.jar"/>
19+
<classpathentry exported="true" kind="lib" path="lib/httpcore-4.3.jar"/>
20+
<classpathentry exported="true" kind="lib" path="lib/httpmime-4.3.1.jar"/>
21+
<classpathentry exported="true" kind="lib" path="lib/noggit-0.5.jar"/>
22+
<classpathentry exported="true" kind="lib" path="lib/wstx-asl-3.2.7.jar"/>
23+
<classpathentry exported="true" kind="lib" path="lib/zookeeper-3.4.6.jar"/>
24+
<classpathentry exported="true" kind="lib" path="lib/commons-logging-1.2.jar"/>
1525
<classpathentry kind="output" path="bin"/>
1626
</classpath>

Diff for: lib/commons-io-2.3.jar

178 KB
Binary file not shown.

Diff for: lib/commons-logging-1.2.jar

60.4 KB
Binary file not shown.

Diff for: lib/httpclient-4.3.1.jar

572 KB
Binary file not shown.

Diff for: lib/httpcore-4.3.jar

276 KB
Binary file not shown.

Diff for: lib/httpmime-4.3.1.jar

36.2 KB
Binary file not shown.

Diff for: lib/noggit-0.5.jar

22 KB
Binary file not shown.

Diff for: lib/slf4j-api-1.7.6.jar

28 KB
Binary file not shown.

Diff for: lib/solr-solrj-4.10.1.jar

440 KB
Binary file not shown.

Diff for: lib/wstx-asl-3.2.7.jar

509 KB
Binary file not shown.

Diff for: lib/zookeeper-3.4.6.jar

774 KB
Binary file not shown.

Diff for: src/common/Bookmark.java

+8-5
Original file line numberDiff line numberDiff line change
@@ -45,11 +45,11 @@ public class Bookmark implements Comparable<Bookmark> {
4545
public Bookmark(int userID, int wikiID, String timestamp) {
4646
this.userID = userID;
4747
this.resID = wikiID;
48-
//if (timestamp.contains(".")) {
49-
// this.timestamp = timestamp.substring(0, timestamp.indexOf("."));
50-
//} else {
48+
if (timestamp.contains(".")) {
49+
this.timestamp = timestamp.substring(0, timestamp.indexOf("."));
50+
} else {
5151
this.timestamp = timestamp;
52-
//}
52+
}
5353

5454
this.categories = new ArrayList<Integer>();
5555
this.tags = new ArrayList<Integer>();
@@ -286,7 +286,10 @@ public static Map<Integer, Double> getResourcesFromUserWithRec(List<Bookmark> tr
286286
for (Bookmark data : trainData) {
287287
if (data.userID == userID) {
288288
if (dValue != null) {
289-
long timestamp = Long.parseLong(data.getTimestamp());
289+
long timestamp = 0;
290+
if (!data.getTimestamp().isEmpty()) {
291+
timestamp = Long.parseLong(data.getTimestamp());
292+
}
290293
Double rec = Math.pow(refTimestamp - timestamp + 1.0, dValue.doubleValue() * (-1.0));
291294
if (!rec.isInfinite() && !rec.isNaN()) {
292295
//resourceMap.put(data.resID, Math.log(rec.doubleValue() + 1.0));

Diff for: src/common/CooccurenceMatrix.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ public class CooccurenceMatrix {
3737
private List<Integer> tagCounts;
3838

3939
public CooccurenceMatrix(List<Bookmark> bookmarks, List<Integer> tagCounts, boolean normalize) {
40-
System.out.println("Building matrix ...");
40+
//System.out.println("Building matrix ...");
4141
this.coocurenceMatrix = new SparseMatrix();
4242
this.tagCounts = tagCounts;
4343
this.initMatrix(bookmarks);

Diff for: src/common/SolrConnector.java

+80
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
package common;
2+
3+
import java.util.ArrayList;
4+
import java.util.HashSet;
5+
import java.util.LinkedHashMap;
6+
import java.util.List;
7+
import java.util.Map;
8+
import java.util.Set;
9+
10+
import org.apache.solr.client.solrj.SolrQuery;
11+
import org.apache.solr.client.solrj.SolrServer;
12+
import org.apache.solr.client.solrj.SolrServerException;
13+
import org.apache.solr.client.solrj.impl.HttpSolrServer;
14+
import org.apache.solr.client.solrj.response.QueryResponse;
15+
import org.apache.solr.common.SolrDocument;
16+
import org.apache.solr.common.SolrDocumentList;
17+
18+
public class SolrConnector {
19+
20+
private SolrServer server;
21+
22+
public SolrConnector(String core) {
23+
this.server = new HttpSolrServer("http://localhost:8983/solr/" + core);
24+
}
25+
26+
@SuppressWarnings("unchecked")
27+
public Map<String, Set<String>> getTweets() {
28+
Map<String, Set<String>> tweets = new LinkedHashMap<String, Set<String>>();
29+
30+
SolrQuery solrParams = new SolrQuery();
31+
solrParams.set("q", "*:*");
32+
solrParams.set("fl", "text,hashtags");
33+
solrParams.set("rows", Integer.MAX_VALUE);
34+
QueryResponse r = null;
35+
try {
36+
r = this.server.query(solrParams);
37+
SolrDocumentList docs = r.getResults();
38+
for (SolrDocument d : docs) {
39+
tweets.put((String) d.get("text"), new HashSet<String>((List<String>) d.get("hashtags")));
40+
}
41+
} catch (SolrServerException e) {
42+
e.printStackTrace();
43+
}
44+
45+
return tweets;
46+
}
47+
48+
@SuppressWarnings("unchecked")
49+
public Map<String, Double> getTopHashtagsForTweetText(String tweetText, int limit) {
50+
Map<String, Double> hashtagMap = new LinkedHashMap<String, Double>();
51+
52+
SolrQuery solrParams = new SolrQuery();
53+
solrParams.set("q", "text:" + tweetText);
54+
solrParams.set("fl", "hashtags,score");
55+
solrParams.set("rows", 20);
56+
QueryResponse r = null;
57+
try {
58+
r = this.server.query(solrParams);
59+
SolrDocumentList docs = r.getResults();
60+
for (SolrDocument d : docs) {
61+
double score = (float) d.get("score");
62+
Set<String> hashtags = new HashSet<String>((List<String>) d.get("hashtags"));
63+
for (String h : hashtags) {
64+
if (hashtagMap.size() < limit) {
65+
if (!hashtagMap.containsKey(h)) {
66+
hashtagMap.put(h, score);
67+
}
68+
}
69+
}
70+
if (hashtagMap.size() >= limit) {
71+
break;
72+
}
73+
}
74+
} catch (SolrServerException e) {
75+
e.printStackTrace();
76+
}
77+
78+
return hashtagMap;
79+
}
80+
}

Diff for: src/engine/BaseLevelLearningEngine.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ public void loadFile(String filename) throws Exception {
8383
}
8484
Map<Integer, Double> topTags = EngineUtils.calcTopEntities(reader, EntityType.TAG);
8585

86-
// calculate associative component
86+
System.out.println("calculate associative component for BLLac");
8787
CooccurenceMatrix matrix = new CooccurenceMatrix(reader.getBookmarks(), reader.getTagCounts(), true);
8888

8989
resetStructures(userMaps, resMaps, reader, topTags, matrix, userCounts, resCounts);

Diff for: src/engine/TagRecommenderEvalEngine.java

+24-20
Original file line numberDiff line numberDiff line change
@@ -31,16 +31,16 @@ License, or (at your option) any later version.
3131

3232
public class TagRecommenderEvalEngine implements EngineInterface {
3333

34-
private EngineInterface mpEngine;
34+
//private EngineInterface mpEngine;
3535
private EngineInterface bllEngine;
36-
private EngineInterface threelEngine;
36+
//private EngineInterface threelEngine;
3737
//private Random random;
3838
private BufferedWriter bw;
3939

4040
public TagRecommenderEvalEngine() {
41-
this.mpEngine = null;
41+
//this.mpEngine = null;
4242
this.bllEngine = null;
43-
this.threelEngine = null;
43+
//this.threelEngine = null;
4444
//this.random = new Random();
4545
this.bw = null;
4646

@@ -58,20 +58,20 @@ public void loadFile(String filename) throws Exception {
5858
//this.bllEngine = null;
5959
//this.threelEngine = null;
6060

61-
/* old
6261
BookmarkReader reader = new BookmarkReader(0, false);
6362
reader.readFile(filename);
64-
if (reader.getCategories().size() > 0) {
65-
this.threelEngine = new ThreeLayersEngine();
66-
this.threelEngine.loadFile(filename);
67-
}
68-
if (reader.hasTimestamp()) {
63+
//if (reader.getCategories().size() > 0) {
64+
// this.threelEngine = new ThreeLayersEngine();
65+
// this.threelEngine.loadFile(filename);
66+
//}
67+
//if (reader.hasTimestamp()) {
6968
this.bllEngine = new BaseLevelLearningEngine();
7069
this.bllEngine.loadFile(filename);
71-
}
72-
this.lmEngine = new LanguageModelEngine();
73-
this.lmEngine.loadFile(filename);
74-
*/
70+
//}
71+
//this.lmEngine = new LanguageModelEngine();
72+
//this.lmEngine.loadFile(filename);
73+
74+
/* KnowBrain
7575
//if (filename.contains("group1") || filename.contains("group3")) {
7676
this.threelEngine = new ThreeLayersCollectiveEngine();
7777
this.threelEngine.loadFile(filename);
@@ -81,14 +81,15 @@ public void loadFile(String filename) throws Exception {
8181
//}
8282
this.mpEngine = new MostPopularCollectiveEngine();
8383
this.mpEngine.loadFile(filename);
84+
*/
8485
}
8586

8687
@Override
8788
public synchronized Map<String, Double> getEntitiesWithLikelihood(String user, String resource, List<String> topics, Integer count, Boolean filterOwnEntities, Algorithm algorithm, EntityType type) {
8889
Map<String, Double> returnMap = null;
8990
String algorithmString = null;
9091

91-
/* old
92+
9293
if (this.bllEngine != null) {
9394
if (algorithm == null || algorithm == Algorithm.BLLacMPr) {
9495
algorithmString = "BLLacMPr";
@@ -101,11 +102,12 @@ public synchronized Map<String, Double> getEntitiesWithLikelihood(String user, S
101102
returnMap = this.bllEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type);
102103
}
103104
}
104-
if (algorithmString == null) {
105-
algorithmString = "MPur";
106-
returnMap = this.lmEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type);
107-
}
108-
*/
105+
//if (algorithmString == null) {
106+
// algorithmString = "MPur";
107+
// returnMap = this.lmEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type);
108+
//}
109+
110+
/* KnowBrain study
109111
if (algorithm == null || algorithm == Algorithm.THREELcoll || algorithm == Algorithm.THREEL) {
110112
if (this.threelEngine != null) {
111113
returnMap = this.threelEngine.getEntitiesWithLikelihood(user, resource, topics, count, filterOwnEntities, algorithm, type);
@@ -130,6 +132,8 @@ public synchronized Map<String, Double> getEntitiesWithLikelihood(String user, S
130132
algorithmString = "MP";
131133
}
132134
}
135+
*/
136+
133137
if (this.bw != null) {
134138
try {
135139
this.bw.write(user + "|" + resource + "|" + topics + "|" + count + "|" + filterOwnEntities + "|" + System.currentTimeMillis() + "|" + algorithmString + "|" + returnMap.keySet() + "\n");

Diff for: src/file/PredictionFileWriter.java

+31
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ License, or (at your option) any later version.
2626
import java.io.IOException;
2727
import java.util.List;
2828
import java.util.Map;
29+
import java.util.Set;
2930

3031
import common.Bookmark;
3132

@@ -146,4 +147,34 @@ public boolean writeResourcePredictionsToFile(String filename, int trainSize, in
146147

147148
return false;
148149
}
150+
151+
// Statics
152+
public static void writeSimplePredictions(List<Set<String>> predictionValues, List<Set<String>> realValues, String filename) {
153+
try {
154+
FileWriter writer = new FileWriter(new File("./data/results/" + filename + ".txt"));
155+
BufferedWriter bw = new BufferedWriter(writer);
156+
157+
for (int i = 0; i < predictionValues.size(); i++) {
158+
String resultString = i + "|";
159+
Set<String> predictions = predictionValues.get(i);
160+
Set<String> values = realValues.get(i);
161+
162+
for (String p : predictions) {
163+
resultString += (p + ", ");
164+
}
165+
resultString += "|";
166+
for (String v : values) {
167+
resultString += (v + ", ");
168+
}
169+
resultString += "\n";
170+
171+
bw.write(resultString);
172+
}
173+
174+
bw.flush();
175+
bw.close();
176+
} catch (IOException e) {
177+
e.printStackTrace();
178+
}
179+
}
149180
}

Diff for: src/itemrecommendations/CFResourceCalculator.java

+3-3
Original file line numberDiff line numberDiff line change
@@ -176,14 +176,14 @@ public Map<Integer, Double> getRankedResourcesList(int userID, int resID, boolea
176176
return Utilities.getSimUsersForResource(resID, this.allUsers, this.userMaps, this.resMaps, resourceUsers, this.sim, sorting);
177177
} else {
178178
if (userID != -1) {
179-
Map<Integer, Double> candidateSet = new LinkedHashMap<Integer, Double>();
179+
/*Map<Integer, Double> candidateSet = new LinkedHashMap<Integer, Double>();
180180
for (Map.Entry<Integer, Double> entry : this.rankedResourceCalculator.getRankedResourcesList(userID, -1, true, false, false, true, false).entrySet()) {
181181
if (candidateSet.size() < 100) {
182182
candidateSet.put(entry.getKey(), entry.getValue());
183183
}
184-
}
184+
}*/
185185

186-
sortedResources = Utilities.getSimResourcesForUser(userID, /*candidateSet*/this.allResources, this.userMaps, this.resMaps,
186+
sortedResources = Utilities.getSimResourcesForUser(userID, this.allResources, this.userMaps, this.resMaps,
187187
filterOwnEntities ? userResources : new ArrayList<Integer>(), this.sim, sorting);
188188
} else if (resID != -1) {
189189
sortedResources = Utilities.getSimResources(-1, resID, null, this.allResources, this.resMaps, this.trainList, this.sim, sorting);

Diff for: src/processing/SolrHashtagCalculator.java

+35
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
package processing;
2+
3+
import java.util.ArrayList;
4+
import java.util.List;
5+
import java.util.Map;
6+
import java.util.Set;
7+
8+
import com.google.common.primitives.Ints;
9+
10+
import common.Bookmark;
11+
import common.SolrConnector;
12+
import file.BookmarkReader;
13+
import file.PredictionFileWriter;
14+
15+
public class SolrHashtagCalculator {
16+
17+
private final static int LIMIT = 10;
18+
19+
// Statics ----------------------------------------------------------------------------------------------------------------------
20+
public static void predictSample(String filename) {
21+
List<Set<String>> predictionValues = new ArrayList<Set<String>>();
22+
List<Set<String>> realValues = new ArrayList<Set<String>>();
23+
SolrConnector trainConnector = new SolrConnector(filename/* + "train"*/);
24+
SolrConnector testConnector = new SolrConnector(filename/* + "test"*/);
25+
26+
Map<String, Set<String>> tweets = testConnector.getTweets();
27+
for (Map.Entry<String, Set<String>> tweet : tweets.entrySet()) {
28+
Map<String, Double> map = trainConnector.getTopHashtagsForTweetText(tweet.getKey(), LIMIT);
29+
predictionValues.add(map.keySet());
30+
realValues.add(tweet.getValue());
31+
}
32+
33+
PredictionFileWriter.writeSimplePredictions(predictionValues, realValues, filename + "_solrht");
34+
}
35+
}

0 commit comments

Comments
 (0)