|
17 | 17 |
|
18 | 18 | package org.apache.mahout.clustering.syntheticcontrol.meanshift;
|
19 | 19 |
|
| 20 | +import java.io.IOException; |
20 | 21 | import java.util.Map;
|
21 | 22 |
|
22 | 23 | import org.apache.hadoop.conf.Configuration;
|
| 24 | +import org.apache.hadoop.fs.FileSystem; |
23 | 25 | import org.apache.hadoop.fs.Path;
|
24 | 26 | import org.apache.hadoop.util.ToolRunner;
|
25 | 27 | import org.apache.mahout.clustering.conversion.meanshift.InputDriver;
|
@@ -95,6 +97,21 @@ public int run(String[] args) throws Exception{
|
95 | 97 | return 0;
|
96 | 98 | }
|
97 | 99 |
|
| 100 | + /** |
| 101 | + * Return the path to the final iteration's clusters |
| 102 | + */ |
| 103 | + private static Path finalClusterPath(Configuration conf, Path output, |
| 104 | + int maxIterations) throws IOException { |
| 105 | + FileSystem fs = FileSystem.get(conf); |
| 106 | + for (int i = maxIterations; i >= 0; i--) { |
| 107 | + Path clusters = new Path(output, "clusters-" + i + "-final"); |
| 108 | + if (fs.exists(clusters)) { |
| 109 | + return clusters; |
| 110 | + } |
| 111 | + } |
| 112 | + return null; |
| 113 | + } |
| 114 | + |
98 | 115 | /**
|
99 | 116 | * Run the meanshift clustering job on an input dataset using the given
|
100 | 117 | * distance measure, t1, t2 and iteration parameters. All output data will be
|
@@ -140,8 +157,9 @@ public static void run(Configuration conf,
|
140 | 157 | measure, kernelProfile, t1, t2, convergenceDelta, maxIterations, true,
|
141 | 158 | true, false);
|
142 | 159 | // run ClusterDumper
|
143 |
| - ClusterDumper clusterDumper = new ClusterDumper(new Path(output, |
144 |
| - "clusters-" + maxIterations), new Path(output, "clusteredPoints")); |
| 160 | + ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath( |
| 161 | + conf, output, maxIterations), |
| 162 | + new Path(output, "clusteredPoints")); |
145 | 163 | clusterDumper.printClusters(null);
|
146 | 164 | }
|
147 | 165 |
|
|
0 commit comments