Skip to content

Commit f65a550

Browse files
author
Bilung Lee
committed
CDH-4645 Mahout example job code need to be updated
1 parent 9064081 commit f65a550

File tree

7 files changed

+47
-10
lines changed

7 files changed

+47
-10
lines changed

core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.mahout.fpm.pfpgrowth2;
18+
package org.apache.mahout.fpm.pfpgrowth;
1919

2020
import java.io.File;
2121
import java.io.Writer;

core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
* limitations under the License.
1616
*/
1717

18-
package org.apache.mahout.fpm.pfpgrowth2;
18+
package org.apache.mahout.fpm.pfpgrowth;
1919

2020
import java.io.File;
2121
import java.io.Writer;

examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ private static void run(Path input, Path output, DistanceMeasure measure,
8787
output, measure, t1, t2, true, false);
8888
// run ClusterDumper
8989
ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
90-
"clusters-0"), new Path(output, "clusteredPoints"));
90+
"clusters-0-final"), new Path(output, "clusteredPoints"));
9191
clusterDumper.printClusters(null);
9292
}
9393

examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java

+20-2
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,13 @@
1717

1818
package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
1919

20+
import java.io.IOException;
2021
import java.util.Map;
2122

2223
import org.apache.commons.cli2.builder.ArgumentBuilder;
2324
import org.apache.commons.cli2.builder.DefaultOptionBuilder;
2425
import org.apache.hadoop.conf.Configuration;
26+
import org.apache.hadoop.fs.FileSystem;
2527
import org.apache.hadoop.fs.Path;
2628
import org.apache.hadoop.util.ToolRunner;
2729
import org.apache.mahout.clustering.conversion.InputDriver;
@@ -112,6 +114,21 @@ public int run(String[] args) throws Exception{
112114
return 0;
113115
}
114116

117+
/**
118+
* Return the path to the final iteration's clusters
119+
*/
120+
private static Path finalClusterPath(Configuration conf, Path output,
121+
int maxIterations) throws IOException {
122+
FileSystem fs = FileSystem.get(conf);
123+
for (int i = maxIterations; i >= 0; i--) {
124+
Path clusters = new Path(output, "clusters-" + i + "-final");
125+
if (fs.exists(clusters)) {
126+
return clusters;
127+
}
128+
}
129+
return null;
130+
}
131+
115132
/**
116133
* Run the job using supplied arguments, deleting the output directory if it exists beforehand
117134
*
@@ -149,8 +166,9 @@ public static void run(Path input,
149166
threshold,
150167
false);
151168
// run ClusterDumper
152-
ClusterDumper clusterDumper =
153-
new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output, "clusteredPoints"));
169+
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(
170+
new Configuration(), output, maxIterations),
171+
new Path(output, "clusteredPoints"));
154172
clusterDumper.printClusters(null);
155173
}
156174

examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java

+3-2
Original file line numberDiff line numberDiff line change
@@ -120,7 +120,7 @@ private static Path finalClusterPath(Configuration conf, Path output,
120120
int maxIterations) throws IOException {
121121
FileSystem fs = FileSystem.get(conf);
122122
for (int i = maxIterations; i >= 0; i--) {
123-
Path clusters = new Path(output, "clusters-" + i);
123+
Path clusters = new Path(output, "clusters-" + i + "-final");
124124
if (fs.exists(clusters)) {
125125
return clusters;
126126
}
@@ -174,7 +174,8 @@ public static void run(Configuration conf,
174174
output, measure, t1, t2, false, false);
175175
log.info("Running FuzzyKMeans");
176176
FuzzyKMeansDriver.run(directoryContainingConvertedInput, new Path(output,
177-
Cluster.INITIAL_CLUSTERS_DIR), output, measure, convergenceDelta,
177+
Cluster.INITIAL_CLUSTERS_DIR + Cluster.FINAL_ITERATION_SUFFIX),
178+
output, measure, convergenceDelta,
178179
maxIterations, fuzziness, true, true, 0.0, false);
179180
// run ClusterDumper
180181
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,

examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java

+1-1
Original file line numberDiff line numberDiff line change
@@ -208,7 +208,7 @@ private static Path finalClusterPath(Configuration conf, Path output,
208208
int maxIterations) throws IOException {
209209
FileSystem fs = FileSystem.get(conf);
210210
for (int i = maxIterations; i >= 0; i--) {
211-
Path clusters = new Path(output, "clusters-" + i);
211+
Path clusters = new Path(output, "clusters-" + i + "-final");
212212
if (fs.exists(clusters)) {
213213
return clusters;
214214
}

examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java

+20-2
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717

1818
package org.apache.mahout.clustering.syntheticcontrol.meanshift;
1919

20+
import java.io.IOException;
2021
import java.util.Map;
2122

2223
import org.apache.hadoop.conf.Configuration;
24+
import org.apache.hadoop.fs.FileSystem;
2325
import org.apache.hadoop.fs.Path;
2426
import org.apache.hadoop.util.ToolRunner;
2527
import org.apache.mahout.clustering.conversion.meanshift.InputDriver;
@@ -95,6 +97,21 @@ public int run(String[] args) throws Exception{
9597
return 0;
9698
}
9799

100+
/**
101+
* Return the path to the final iteration's clusters
102+
*/
103+
private static Path finalClusterPath(Configuration conf, Path output,
104+
int maxIterations) throws IOException {
105+
FileSystem fs = FileSystem.get(conf);
106+
for (int i = maxIterations; i >= 0; i--) {
107+
Path clusters = new Path(output, "clusters-" + i + "-final");
108+
if (fs.exists(clusters)) {
109+
return clusters;
110+
}
111+
}
112+
return null;
113+
}
114+
98115
/**
99116
* Run the meanshift clustering job on an input dataset using the given
100117
* distance measure, t1, t2 and iteration parameters. All output data will be
@@ -140,8 +157,9 @@ public static void run(Configuration conf,
140157
measure, kernelProfile, t1, t2, convergenceDelta, maxIterations, true,
141158
true, false);
142159
// run ClusterDumper
143-
ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
144-
"clusters-" + maxIterations), new Path(output, "clusteredPoints"));
160+
ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(
161+
conf, output, maxIterations),
162+
new Path(output, "clusteredPoints"));
145163
clusterDumper.printClusters(null);
146164
}
147165

0 commit comments

Comments
 (0)