CDH-4645 Mahout example job code need to be updated

Bilung Lee · Bilung Lee · commit f65a550c6805 · 2012-03-27T16:40:58.000-07:00
diff --git a/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java b/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthRetailDataTest2.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.fpm.pfpgrowth2;
+package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.File;
 import java.io.Writer;
diff --git a/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java b/core/src/test/java/org/apache/mahout/fpm/pfpgrowth/PFPGrowthSynthDataTest2.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.apache.mahout.fpm.pfpgrowth2;
+package org.apache.mahout.fpm.pfpgrowth;
 
 import java.io.File;
 import java.io.Writer;
diff --git a/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java b/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/canopy/Job.java
@@ -87,7 +87,7 @@ private static void run(Path input, Path output, DistanceMeasure measure,
         output, measure, t1, t2, true, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
-        "clusters-0"), new Path(output, "clusteredPoints"));
+        "clusters-0-final"), new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
 
diff --git a/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java b/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/dirichlet/Job.java
@@ -17,11 +17,13 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.dirichlet;
 
+import java.io.IOException;
 import java.util.Map;
 
 import org.apache.commons.cli2.builder.ArgumentBuilder;
 import org.apache.commons.cli2.builder.DefaultOptionBuilder;
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.clustering.conversion.InputDriver;
@@ -112,6 +114,21 @@ public int run(String[] args) throws Exception{
     return 0;
   }
 
+  /**
+   * Return the path to the final iteration's clusters
+   */
+  private static Path finalClusterPath(Configuration conf, Path output,
+      int maxIterations) throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    for (int i = maxIterations; i >= 0; i--) {
+      Path clusters = new Path(output, "clusters-" + i + "-final");
+      if (fs.exists(clusters)) {
+        return clusters;
+      }
+    }
+    return null;
+  }
+
   /**
    * Run the job using supplied arguments, deleting the output directory if it exists beforehand
    * 
@@ -149,8 +166,9 @@ public static void run(Path input,
                         threshold,
                         false);
     // run ClusterDumper
-    ClusterDumper clusterDumper =
-        new ClusterDumper(new Path(output, "clusters-" + maxIterations), new Path(output, "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(
+      new Configuration(), output, maxIterations),
+      new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
 
diff --git a/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java b/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/fuzzykmeans/Job.java
@@ -120,7 +120,7 @@ private static Path finalClusterPath(Configuration conf, Path output,
       int maxIterations) throws IOException {
     FileSystem fs = FileSystem.get(conf);
     for (int i = maxIterations; i >= 0; i--) {
-      Path clusters = new Path(output, "clusters-" + i);
+      Path clusters = new Path(output, "clusters-" + i + "-final");
       if (fs.exists(clusters)) {
         return clusters;
       }
@@ -174,7 +174,8 @@ public static void run(Configuration conf,
         output, measure, t1, t2, false, false);
     log.info("Running FuzzyKMeans");
     FuzzyKMeansDriver.run(directoryContainingConvertedInput, new Path(output,
-        Cluster.INITIAL_CLUSTERS_DIR), output, measure, convergenceDelta,
+        Cluster.INITIAL_CLUSTERS_DIR + Cluster.FINAL_ITERATION_SUFFIX),
+        output, measure, convergenceDelta,
         maxIterations, fuzziness, true, true, 0.0, false);
     // run ClusterDumper
     ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(conf,
diff --git a/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java b/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/kmeans/Job.java
@@ -208,7 +208,7 @@ private static Path finalClusterPath(Configuration conf, Path output,
       int maxIterations) throws IOException {
     FileSystem fs = FileSystem.get(conf);
     for (int i = maxIterations; i >= 0; i--) {
-      Path clusters = new Path(output, "clusters-" + i);
+      Path clusters = new Path(output, "clusters-" + i + "-final");
       if (fs.exists(clusters)) {
         return clusters;
       }
diff --git a/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java b/examples/src/main/java/org/apache/mahout/clustering/syntheticcontrol/meanshift/Job.java
@@ -17,9 +17,11 @@
 
 package org.apache.mahout.clustering.syntheticcontrol.meanshift;
 
+import java.io.IOException;
 import java.util.Map;
 
 import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
 import org.apache.hadoop.util.ToolRunner;
 import org.apache.mahout.clustering.conversion.meanshift.InputDriver;
@@ -95,6 +97,21 @@ public int run(String[] args) throws Exception{
     return 0;
   }
 
+  /**
+   * Return the path to the final iteration's clusters
+   */
+  private static Path finalClusterPath(Configuration conf, Path output,
+      int maxIterations) throws IOException {
+    FileSystem fs = FileSystem.get(conf);
+    for (int i = maxIterations; i >= 0; i--) {
+      Path clusters = new Path(output, "clusters-" + i + "-final");
+      if (fs.exists(clusters)) {
+        return clusters;
+      }
+    }
+    return null;
+  }
+
   /**
    * Run the meanshift clustering job on an input dataset using the given
    * distance measure, t1, t2 and iteration parameters. All output data will be
@@ -140,8 +157,9 @@ public static void run(Configuration conf,
         measure, kernelProfile, t1, t2, convergenceDelta, maxIterations, true,
         true, false);
     // run ClusterDumper
-    ClusterDumper clusterDumper = new ClusterDumper(new Path(output,
-        "clusters-" + maxIterations), new Path(output, "clusteredPoints"));
+    ClusterDumper clusterDumper = new ClusterDumper(finalClusterPath(
+        conf, output, maxIterations),
+        new Path(output, "clusteredPoints"));
     clusterDumper.printClusters(null);
   }
   

Original file line number	Diff line number	Diff line change
`@@ -87,7 +87,7 @@ private static void run(Path input, Path output, DistanceMeasure measure,`
`87`	`87`	`output, measure, t1, t2, true, false);`
`88`	`88`	`// run ClusterDumper`
`89`	`89`	`ClusterDumper clusterDumper = new ClusterDumper(new Path(output,`
`90`		`- "clusters-0"), new Path(output, "clusteredPoints"));`
	`90`	`+ "clusters-0-final"), new Path(output, "clusteredPoints"));`
`91`	`91`	`clusterDumper.printClusters(null);`
`92`	`92`	`}`
`93`	`93`
Original file line number	Diff line number	Diff line change
`@@ -208,7 +208,7 @@ private static Path finalClusterPath(Configuration conf, Path output,`
`208`	`208`	`int maxIterations) throws IOException {`
`209`	`209`	`FileSystem fs = FileSystem.get(conf);`
`210`	`210`	`for (int i = maxIterations; i >= 0; i--) {`
`211`		`- Path clusters = new Path(output, "clusters-" + i);`
	`211`	`+ Path clusters = new Path(output, "clusters-" + i + "-final");`
`212`	`212`	`if (fs.exists(clusters)) {`
`213`	`213`	`return clusters;`
`214`	`214`	`}`