2018-06-27 13:14:29 UTC
diff --git a/examples/src/main/resources/cf-data-purchase.txt b/examples/src/main/resources/cf-data-purchase.txt
deleted file mode 100644
index d87c031..0000000
--- a/examples/src/main/resources/cf-data-purchase.txt
+++ /dev/null
@@ -1,7 +0,0 @@
diff --git a/examples/src/main/resources/cf-data-view.txt b/examples/src/main/resources/cf-data-view.txt
deleted file mode 100644
index 09ad9b6..0000000
--- a/examples/src/main/resources/cf-data-view.txt
+++ /dev/null
@@ -1,12 +0,0 @@
diff --git a/examples/src/main/resources/donut-test.csv b/examples/src/main/resources/donut-test.csv
deleted file mode 100644
index 46ea564..0000000
--- a/examples/src/main/resources/donut-test.csv
+++ /dev/null
@@ -1,41 +0,0 @@
diff --git a/examples/src/main/resources/donut.csv b/examples/src/main/resources/donut.csv
deleted file mode 100644
index 33ba3b7..0000000
--- a/examples/src/main/resources/donut.csv
+++ /dev/null
@@ -1,41 +0,0 @@
diff --git a/examples/src/main/resources/test-data.csv b/examples/src/main/resources/test-data.csv
deleted file mode 100644
index ab683cd..0000000
--- a/examples/src/main/resources/test-data.csv
+++ /dev/null
@@ -1,61 +0,0 @@
diff --git a/examples/src/test/java/org/apache/mahout/classifier/sgd/LogisticModelParametersTest.java b/examples/src/test/java/org/apache/mahout/classifier/sgd/LogisticModelParametersTest.java
deleted file mode 100644
index e849011..0000000
--- a/examples/src/test/java/org/apache/mahout/classifier/sgd/LogisticModelParametersTest.java
+++ /dev/null
@@ -1,43 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.sgd;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-public class LogisticModelParametersTest extends MahoutTestCase {
- @Test
- public void serializationWithoutCsv() throws IOException {
- LogisticModelParameters params = new LogisticModelParameters();
- params.setTargetVariable("foo");
- params.setTypeMap(Collections.<String, String>emptyMap());
- params.setTargetCategories(Arrays.asList("foo", "bar"));
- params.setNumFeatures(1);
- params.createRegression();
- //MAHOUT-1196 should work without "csv" being set
- params.saveTo(new ByteArrayOutputStream());
- }
diff --git a/examples/src/test/java/org/apache/mahout/classifier/sgd/ModelDissectorTest.java b/examples/src/test/java/org/apache/mahout/classifier/sgd/ModelDissectorTest.java
deleted file mode 100644
index c8e4879..0000000
--- a/examples/src/test/java/org/apache/mahout/classifier/sgd/ModelDissectorTest.java
+++ /dev/null
@@ -1,40 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.sgd;
-import org.apache.mahout.examples.MahoutTestCase;
-import org.apache.mahout.math.DenseVector;
-import org.junit.Test;
-public class ModelDissectorTest extends MahoutTestCase {
- @Test
- public void testCategoryOrdering() {
- ModelDissector.Weight w = new ModelDissector.Weight("a", new DenseVector(new double[]{-2, -5, 5, 2, 4, 1, 0}), 4);
- assertEquals(1, w.getCategory(0), 0);
- assertEquals(-5, w.getWeight(0), 0);
- assertEquals(2, w.getCategory(1), 0);
- assertEquals(5, w.getWeight(1), 0);
- assertEquals(4, w.getCategory(2), 0);
- assertEquals(4, w.getWeight(2), 0);
- assertEquals(0, w.getCategory(3), 0);
- assertEquals(-2, w.getWeight(3), 0);
- }
diff --git a/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java b/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
deleted file mode 100644
index 4cde692..0000000
--- a/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
+++ /dev/null
@@ -1,167 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.sgd;
-import com.google.common.base.Charsets;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Sets;
-import com.google.common.io.Resources;
-import org.apache.mahout.classifier.AbstractVectorClassifier;
-import org.apache.mahout.examples.MahoutTestCase;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
-import org.junit.Test;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-public class TrainLogisticTest extends MahoutTestCase {
- @Test
- public void example131() throws Exception {
- String outputFile = getTestTempFile("model").getAbsolutePath();
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw, true);
- TrainLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--output", outputFile,
- "--target", "color", "--categories", "2",
- "--predictors", "x", "y",
- "--types", "numeric",
- "--features", "20",
- "--passes", "100",
- "--rate", "50"
- }, pw);
- String trainOut = sw.toString();
- assertTrue(trainOut.contains("x -0.7"));
- assertTrue(trainOut.contains("y -0.4"));
- LogisticModelParameters lmp = TrainLogistic.getParameters();
- assertEquals(1.0e-4, lmp.getLambda(), 1.0e-9);
- assertEquals(20, lmp.getNumFeatures());
- assertTrue(lmp.useBias());
- assertEquals("color", lmp.getTargetVariable());
- CsvRecordFactory csv = lmp.getCsvRecordFactory();
- assertEquals("[1, 2]", new TreeSet<>(csv.getTargetCategories()).toString());
- assertEquals("[Intercept Term, x, y]", Sets.newTreeSet(csv.getPredictors()).toString());
- // verify model by building dissector
- AbstractVectorClassifier model = TrainLogistic.getModel();
- List<String> data = Resources.readLines(Resources.getResource("donut.csv"), Charsets.UTF_8);
- Map<String, Double> expectedValues = ImmutableMap.of("x", -0.7, "y", -0.43, "Intercept Term", -0.15);
- verifyModel(lmp, csv, data, model, expectedValues);
- // test saved model
- try (InputStream in = new FileInputStream(new File(outputFile))){
- LogisticModelParameters lmpOut = LogisticModelParameters.loadFrom(in);
- CsvRecordFactory csvOut = lmpOut.getCsvRecordFactory();
- csvOut.firstLine(data.get(0));
- OnlineLogisticRegression lrOut = lmpOut.createRegression();
- verifyModel(lmpOut, csvOut, data, lrOut, expectedValues);
- }
- sw = new StringWriter();
- pw = new PrintWriter(sw, true);
- RunLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--model", outputFile,
- "--auc",
- "--confusion"
- }, pw);
- trainOut = sw.toString();
- assertTrue(trainOut.contains("AUC = 0.57"));
- assertTrue(trainOut.contains("confusion: [[27.0, 13.0], [0.0, 0.0]]"));
- }
- @Test
- public void example132() throws Exception {
- String outputFile = getTestTempFile("model").getAbsolutePath();
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw, true);
- TrainLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--output", outputFile,
- "--target", "color",
- "--categories", "2",
- "--predictors", "x", "y", "a", "b", "c",
- "--types", "numeric",
- "--features", "20",
- "--passes", "100",
- "--rate", "50"
- }, pw);
- String trainOut = sw.toString();
- assertTrue(trainOut.contains("a 0."));
- assertTrue(trainOut.contains("b -1."));
- assertTrue(trainOut.contains("c -25."));
- sw = new StringWriter();
- pw = new PrintWriter(sw, true);
- RunLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--model", outputFile,
- "--auc",
- "--confusion"
- }, pw);
- trainOut = sw.toString();
- assertTrue(trainOut.contains("AUC = 1.00"));
- sw = new StringWriter();
- pw = new PrintWriter(sw, true);
- RunLogistic.mainToOutput(new String[]{
- "--input", "donut-test.csv",
- "--model", outputFile,
- "--auc",
- "--confusion"
- }, pw);
- trainOut = sw.toString();
- assertTrue(trainOut.contains("AUC = 0.9"));
- }
- private static void verifyModel(LogisticModelParameters lmp,
- RecordFactory csv,
- List<String> data,
- AbstractVectorClassifier model,
- Map<String, Double> expectedValues) {
- ModelDissector md = new ModelDissector();
- for (String line : data.subList(1, data.size())) {
- Vector v = new DenseVector(lmp.getNumFeatures());
- csv.getTraceDictionary().clear();
- csv.processLine(line, v);
- md.update(v, csv.getTraceDictionary(), model);
- }
- // check right variables are present
- List<ModelDissector.Weight> weights = md.summary(10);
- Set<String> expected = Sets.newHashSet(expectedValues.keySet());
- for (ModelDissector.Weight weight : weights) {
- assertTrue(expected.remove(weight.getFeature()));
- assertEquals(expectedValues.get(weight.getFeature()), weight.getWeight(), 0.1);
- }
- assertEquals(0, expected.size());
- }
diff --git a/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java b/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
deleted file mode 100644
index 6e43b97..0000000
--- a/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
+++ /dev/null
@@ -1,75 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.clustering.display;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Before;
-import org.junit.Test;
-import java.io.IOException;
-public class ClustersFilterTest extends MahoutTestCase {
- private Configuration configuration;
- private Path output;
- @Override
- @Before
- public void setUp() throws Exception {
- super.setUp();
- configuration = getConfiguration();
- output = getTestTempDirPath();
- }
- @Test
- public void testAcceptNotFinal() throws Exception {
- Path path0 = new Path(output, "clusters-0");
- Path path1 = new Path(output, "clusters-1");
- path0.getFileSystem(configuration).createNewFile(path0);
- path1.getFileSystem(configuration).createNewFile(path1);
- PathFilter clustersFilter = new ClustersFilter();
- assertTrue(clustersFilter.accept(path0));
- assertTrue(clustersFilter.accept(path1));
- }
- @Test
- public void testAcceptFinalPath() throws IOException {
- Path path0 = new Path(output, "clusters-0");
- Path path1 = new Path(output, "clusters-1");
- Path path2 = new Path(output, "clusters-2");
- Path path3Final = new Path(output, "clusters-3-final");
- path0.getFileSystem(configuration).createNewFile(path0);
- path1.getFileSystem(configuration).createNewFile(path1);
- path2.getFileSystem(configuration).createNewFile(path2);
- path3Final.getFileSystem(configuration).createNewFile(path3Final);
- PathFilter clustersFilter = new ClustersFilter();
- assertTrue(clustersFilter.accept(path0));
- assertTrue(clustersFilter.accept(path1));
- assertTrue(clustersFilter.accept(path2));
- assertTrue(clustersFilter.accept(path3Final));
- }
diff --git a/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java b/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java
deleted file mode 100644
index 4d81e3f..0000000
--- a/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java
+++ /dev/null
@@ -1,30 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.examples;
- * This class should not exist. It's here to work around some bizarre problem in Maven
- * dependency management wherein it can see methods in {@link org.apache.mahout.common.MahoutTestCase}
- * but not constants. Duplicated here to make it jive.
- */
-public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase {
- /** "Close enough" value for floating-point comparisons. */
- public static final double EPSILON = 0.000001;
diff --git a/examples/src/test/resources/country.txt b/examples/src/test/resources/country.txt
deleted file mode 100644
index 6a22091..0000000
--- a/examples/src/test/resources/country.txt
+++ /dev/null
@@ -1,229 +0,0 @@
-American Samoa
-Antigua and Barbuda
-Bosnia and Herzegovina
-Bouvet Island
-British Indian Ocean Territory
-Brunei Darussalam
-Burkina Faso
-Cape Verde
-Cayman Islands
-Central African Republic
-Christmas Island
-Cocos Islands
-Cook Islands
-Costa Rica
-C�te d'Ivoire
-Czech Republic
-Dominican Republic
-El Salvador
-Equatorial Guinea
-Falkland Islands
-Faroe Islands
-French Guiana
-French Polynesia
-French Southern Territories
-Hong Kong
-Isle of Man
-Marshall Islands
-Netherlands Antilles
-New Caledonia
-New Zealand
-Norfolk Island
-Northern Mariana Islands
-Palestinian Territory
-Papua New Guinea
-Puerto Rico
-Russian Federation
-Saint Barth�lemy
-Saint Helena
-Saint Kitts and Nevis
-Saint Lucia
-Saint Martin
-Saint Pierre and Miquelon
-Saint Vincent and the Grenadines
-San Marino
-Sao Tome and Principe
-Saudi Arabia
-Sierra Leone
-Solomon Islands
-South Africa
-South Georgia and the South Sandwich Islands
-Sri Lanka
-Svalbard and Jan Mayen
-Syrian Arab Republic
-Trinidad and Tobago
-Turks and Caicos Islands
-United Arab Emirates
-United Kingdom
-United States
-United States Minor Outlying Islands
-Virgin Islands
-Wallis and Futuna
diff --git a/examples/src/test/resources/country10.txt b/examples/src/test/resources/country10.txt
deleted file mode 100644
index 97a63e1..0000000
--- a/examples/src/test/resources/country10.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-United Kingdom
diff --git a/examples/src/test/resources/country2.txt b/examples/src/test/resources/country2.txt
deleted file mode 100644
index f4b4f61..0000000
--- a/examples/src/test/resources/country2.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-United States
-United Kingdom
diff --git a/examples/src/test/resources/subjects.txt b/examples/src/test/resources/subjects.txt
deleted file mode 100644
index f52ae33..0000000
--- a/examples/src/test/resources/subjects.txt
+++ /dev/null
@@ -1,2 +0,0 @@
diff --git a/examples/src/test/resources/wdbc.infos b/examples/src/test/resources/wdbc.infos
deleted file mode 100644
index 94a63d6..0000000
--- a/examples/src/test/resources/wdbc.infos
+++ /dev/null
@@ -1,32 +0,0 @@
-NUMERICAL, 6.9, 28.2
-NUMERICAL, 9.7, 39.3
-NUMERICAL, 43.7, 188.5
-NUMERICAL, 143.5, 2501.0
-NUMERICAL, 0.0, 0.2
-NUMERICAL, 0.0, 0.4
-NUMERICAL, 0.0, 0.5
-NUMERICAL, 0.0, 0.3
-NUMERICAL, 0.1, 0.4
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.1, 2.9
-NUMERICAL, 0.3, 4.9
-NUMERICAL, 0.7, 22.0
-NUMERICAL, 6.8, 542.3
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.0, 0.2
-NUMERICAL, 0.0, 0.4
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 7.9, 36.1
-NUMERICAL, 12.0, 49.6
-NUMERICAL, 50.4, 251.2
-NUMERICAL, 185.2, 4254.0
-NUMERICAL, 0.0, 0.3
-NUMERICAL, 0.0, 1.1
-NUMERICAL, 0.0, 1.3
-NUMERICAL, 0.0, 0.3
-NUMERICAL, 0.1, 0.7
-NUMERICAL, 0.0, 0.3
diff --git a/examples/src/main/resources/cf-data-purchase.txt b/examples/src/main/resources/cf-data-purchase.txt
deleted file mode 100644
index d87c031..0000000
--- a/examples/src/main/resources/cf-data-purchase.txt
+++ /dev/null
@@ -1,7 +0,0 @@
diff --git a/examples/src/main/resources/cf-data-view.txt b/examples/src/main/resources/cf-data-view.txt
deleted file mode 100644
index 09ad9b6..0000000
--- a/examples/src/main/resources/cf-data-view.txt
+++ /dev/null
@@ -1,12 +0,0 @@
diff --git a/examples/src/main/resources/donut-test.csv b/examples/src/main/resources/donut-test.csv
deleted file mode 100644
index 46ea564..0000000
--- a/examples/src/main/resources/donut-test.csv
+++ /dev/null
@@ -1,41 +0,0 @@
diff --git a/examples/src/main/resources/donut.csv b/examples/src/main/resources/donut.csv
deleted file mode 100644
index 33ba3b7..0000000
--- a/examples/src/main/resources/donut.csv
+++ /dev/null
@@ -1,41 +0,0 @@
diff --git a/examples/src/main/resources/test-data.csv b/examples/src/main/resources/test-data.csv
deleted file mode 100644
index ab683cd..0000000
--- a/examples/src/main/resources/test-data.csv
+++ /dev/null
@@ -1,61 +0,0 @@
diff --git a/examples/src/test/java/org/apache/mahout/classifier/sgd/LogisticModelParametersTest.java b/examples/src/test/java/org/apache/mahout/classifier/sgd/LogisticModelParametersTest.java
deleted file mode 100644
index e849011..0000000
--- a/examples/src/test/java/org/apache/mahout/classifier/sgd/LogisticModelParametersTest.java
+++ /dev/null
@@ -1,43 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.sgd;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Test;
-import java.io.ByteArrayOutputStream;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Collections;
-public class LogisticModelParametersTest extends MahoutTestCase {
- @Test
- public void serializationWithoutCsv() throws IOException {
- LogisticModelParameters params = new LogisticModelParameters();
- params.setTargetVariable("foo");
- params.setTypeMap(Collections.<String, String>emptyMap());
- params.setTargetCategories(Arrays.asList("foo", "bar"));
- params.setNumFeatures(1);
- params.createRegression();
- //MAHOUT-1196 should work without "csv" being set
- params.saveTo(new ByteArrayOutputStream());
- }
diff --git a/examples/src/test/java/org/apache/mahout/classifier/sgd/ModelDissectorTest.java b/examples/src/test/java/org/apache/mahout/classifier/sgd/ModelDissectorTest.java
deleted file mode 100644
index c8e4879..0000000
--- a/examples/src/test/java/org/apache/mahout/classifier/sgd/ModelDissectorTest.java
+++ /dev/null
@@ -1,40 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.sgd;
-import org.apache.mahout.examples.MahoutTestCase;
-import org.apache.mahout.math.DenseVector;
-import org.junit.Test;
-public class ModelDissectorTest extends MahoutTestCase {
- @Test
- public void testCategoryOrdering() {
- ModelDissector.Weight w = new ModelDissector.Weight("a", new DenseVector(new double[]{-2, -5, 5, 2, 4, 1, 0}), 4);
- assertEquals(1, w.getCategory(0), 0);
- assertEquals(-5, w.getWeight(0), 0);
- assertEquals(2, w.getCategory(1), 0);
- assertEquals(5, w.getWeight(1), 0);
- assertEquals(4, w.getCategory(2), 0);
- assertEquals(4, w.getWeight(2), 0);
- assertEquals(0, w.getCategory(3), 0);
- assertEquals(-2, w.getWeight(3), 0);
- }
diff --git a/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java b/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
deleted file mode 100644
index 4cde692..0000000
--- a/examples/src/test/java/org/apache/mahout/classifier/sgd/TrainLogisticTest.java
+++ /dev/null
@@ -1,167 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.classifier.sgd;
-import com.google.common.base.Charsets;
-import com.google.common.collect.ImmutableMap;
-import com.google.common.collect.Sets;
-import com.google.common.io.Resources;
-import org.apache.mahout.classifier.AbstractVectorClassifier;
-import org.apache.mahout.examples.MahoutTestCase;
-import org.apache.mahout.math.DenseVector;
-import org.apache.mahout.math.Vector;
-import org.junit.Test;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.InputStream;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.List;
-import java.util.Map;
-import java.util.Set;
-import java.util.TreeSet;
-public class TrainLogisticTest extends MahoutTestCase {
- @Test
- public void example131() throws Exception {
- String outputFile = getTestTempFile("model").getAbsolutePath();
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw, true);
- TrainLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--output", outputFile,
- "--target", "color", "--categories", "2",
- "--predictors", "x", "y",
- "--types", "numeric",
- "--features", "20",
- "--passes", "100",
- "--rate", "50"
- }, pw);
- String trainOut = sw.toString();
- assertTrue(trainOut.contains("x -0.7"));
- assertTrue(trainOut.contains("y -0.4"));
- LogisticModelParameters lmp = TrainLogistic.getParameters();
- assertEquals(1.0e-4, lmp.getLambda(), 1.0e-9);
- assertEquals(20, lmp.getNumFeatures());
- assertTrue(lmp.useBias());
- assertEquals("color", lmp.getTargetVariable());
- CsvRecordFactory csv = lmp.getCsvRecordFactory();
- assertEquals("[1, 2]", new TreeSet<>(csv.getTargetCategories()).toString());
- assertEquals("[Intercept Term, x, y]", Sets.newTreeSet(csv.getPredictors()).toString());
- // verify model by building dissector
- AbstractVectorClassifier model = TrainLogistic.getModel();
- List<String> data = Resources.readLines(Resources.getResource("donut.csv"), Charsets.UTF_8);
- Map<String, Double> expectedValues = ImmutableMap.of("x", -0.7, "y", -0.43, "Intercept Term", -0.15);
- verifyModel(lmp, csv, data, model, expectedValues);
- // test saved model
- try (InputStream in = new FileInputStream(new File(outputFile))){
- LogisticModelParameters lmpOut = LogisticModelParameters.loadFrom(in);
- CsvRecordFactory csvOut = lmpOut.getCsvRecordFactory();
- csvOut.firstLine(data.get(0));
- OnlineLogisticRegression lrOut = lmpOut.createRegression();
- verifyModel(lmpOut, csvOut, data, lrOut, expectedValues);
- }
- sw = new StringWriter();
- pw = new PrintWriter(sw, true);
- RunLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--model", outputFile,
- "--auc",
- "--confusion"
- }, pw);
- trainOut = sw.toString();
- assertTrue(trainOut.contains("AUC = 0.57"));
- assertTrue(trainOut.contains("confusion: [[27.0, 13.0], [0.0, 0.0]]"));
- }
- @Test
- public void example132() throws Exception {
- String outputFile = getTestTempFile("model").getAbsolutePath();
- StringWriter sw = new StringWriter();
- PrintWriter pw = new PrintWriter(sw, true);
- TrainLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--output", outputFile,
- "--target", "color",
- "--categories", "2",
- "--predictors", "x", "y", "a", "b", "c",
- "--types", "numeric",
- "--features", "20",
- "--passes", "100",
- "--rate", "50"
- }, pw);
- String trainOut = sw.toString();
- assertTrue(trainOut.contains("a 0."));
- assertTrue(trainOut.contains("b -1."));
- assertTrue(trainOut.contains("c -25."));
- sw = new StringWriter();
- pw = new PrintWriter(sw, true);
- RunLogistic.mainToOutput(new String[]{
- "--input", "donut.csv",
- "--model", outputFile,
- "--auc",
- "--confusion"
- }, pw);
- trainOut = sw.toString();
- assertTrue(trainOut.contains("AUC = 1.00"));
- sw = new StringWriter();
- pw = new PrintWriter(sw, true);
- RunLogistic.mainToOutput(new String[]{
- "--input", "donut-test.csv",
- "--model", outputFile,
- "--auc",
- "--confusion"
- }, pw);
- trainOut = sw.toString();
- assertTrue(trainOut.contains("AUC = 0.9"));
- }
- private static void verifyModel(LogisticModelParameters lmp,
- RecordFactory csv,
- List<String> data,
- AbstractVectorClassifier model,
- Map<String, Double> expectedValues) {
- ModelDissector md = new ModelDissector();
- for (String line : data.subList(1, data.size())) {
- Vector v = new DenseVector(lmp.getNumFeatures());
- csv.getTraceDictionary().clear();
- csv.processLine(line, v);
- md.update(v, csv.getTraceDictionary(), model);
- }
- // check right variables are present
- List<ModelDissector.Weight> weights = md.summary(10);
- Set<String> expected = Sets.newHashSet(expectedValues.keySet());
- for (ModelDissector.Weight weight : weights) {
- assertTrue(expected.remove(weight.getFeature()));
- assertEquals(expectedValues.get(weight.getFeature()), weight.getWeight(), 0.1);
- }
- assertEquals(0, expected.size());
- }
diff --git a/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java b/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
deleted file mode 100644
index 6e43b97..0000000
--- a/examples/src/test/java/org/apache/mahout/clustering/display/ClustersFilterTest.java
+++ /dev/null
@@ -1,75 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.clustering.display;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.PathFilter;
-import org.apache.mahout.common.MahoutTestCase;
-import org.junit.Before;
-import org.junit.Test;
-import java.io.IOException;
-public class ClustersFilterTest extends MahoutTestCase {
- private Configuration configuration;
- private Path output;
- @Override
- @Before
- public void setUp() throws Exception {
- super.setUp();
- configuration = getConfiguration();
- output = getTestTempDirPath();
- }
- @Test
- public void testAcceptNotFinal() throws Exception {
- Path path0 = new Path(output, "clusters-0");
- Path path1 = new Path(output, "clusters-1");
- path0.getFileSystem(configuration).createNewFile(path0);
- path1.getFileSystem(configuration).createNewFile(path1);
- PathFilter clustersFilter = new ClustersFilter();
- assertTrue(clustersFilter.accept(path0));
- assertTrue(clustersFilter.accept(path1));
- }
- @Test
- public void testAcceptFinalPath() throws IOException {
- Path path0 = new Path(output, "clusters-0");
- Path path1 = new Path(output, "clusters-1");
- Path path2 = new Path(output, "clusters-2");
- Path path3Final = new Path(output, "clusters-3-final");
- path0.getFileSystem(configuration).createNewFile(path0);
- path1.getFileSystem(configuration).createNewFile(path1);
- path2.getFileSystem(configuration).createNewFile(path2);
- path3Final.getFileSystem(configuration).createNewFile(path3Final);
- PathFilter clustersFilter = new ClustersFilter();
- assertTrue(clustersFilter.accept(path0));
- assertTrue(clustersFilter.accept(path1));
- assertTrue(clustersFilter.accept(path2));
- assertTrue(clustersFilter.accept(path3Final));
- }
diff --git a/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java b/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java
deleted file mode 100644
index 4d81e3f..0000000
--- a/examples/src/test/java/org/apache/mahout/examples/MahoutTestCase.java
+++ /dev/null
@@ -1,30 +0,0 @@
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.mahout.examples;
- * This class should not exist. It's here to work around some bizarre problem in Maven
- * dependency management wherein it can see methods in {@link org.apache.mahout.common.MahoutTestCase}
- * but not constants. Duplicated here to make it jive.
- */
-public abstract class MahoutTestCase extends org.apache.mahout.common.MahoutTestCase {
- /** "Close enough" value for floating-point comparisons. */
- public static final double EPSILON = 0.000001;
diff --git a/examples/src/test/resources/country.txt b/examples/src/test/resources/country.txt
deleted file mode 100644
index 6a22091..0000000
--- a/examples/src/test/resources/country.txt
+++ /dev/null
@@ -1,229 +0,0 @@
-American Samoa
-Antigua and Barbuda
-Bosnia and Herzegovina
-Bouvet Island
-British Indian Ocean Territory
-Brunei Darussalam
-Burkina Faso
-Cape Verde
-Cayman Islands
-Central African Republic
-Christmas Island
-Cocos Islands
-Cook Islands
-Costa Rica
-C�te d'Ivoire
-Czech Republic
-Dominican Republic
-El Salvador
-Equatorial Guinea
-Falkland Islands
-Faroe Islands
-French Guiana
-French Polynesia
-French Southern Territories
-Hong Kong
-Isle of Man
-Marshall Islands
-Netherlands Antilles
-New Caledonia
-New Zealand
-Norfolk Island
-Northern Mariana Islands
-Palestinian Territory
-Papua New Guinea
-Puerto Rico
-Russian Federation
-Saint Barth�lemy
-Saint Helena
-Saint Kitts and Nevis
-Saint Lucia
-Saint Martin
-Saint Pierre and Miquelon
-Saint Vincent and the Grenadines
-San Marino
-Sao Tome and Principe
-Saudi Arabia
-Sierra Leone
-Solomon Islands
-South Africa
-South Georgia and the South Sandwich Islands
-Sri Lanka
-Svalbard and Jan Mayen
-Syrian Arab Republic
-Trinidad and Tobago
-Turks and Caicos Islands
-United Arab Emirates
-United Kingdom
-United States
-United States Minor Outlying Islands
-Virgin Islands
-Wallis and Futuna
diff --git a/examples/src/test/resources/country10.txt b/examples/src/test/resources/country10.txt
deleted file mode 100644
index 97a63e1..0000000
--- a/examples/src/test/resources/country10.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-United Kingdom
diff --git a/examples/src/test/resources/country2.txt b/examples/src/test/resources/country2.txt
deleted file mode 100644
index f4b4f61..0000000
--- a/examples/src/test/resources/country2.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-United States
-United Kingdom
diff --git a/examples/src/test/resources/subjects.txt b/examples/src/test/resources/subjects.txt
deleted file mode 100644
index f52ae33..0000000
--- a/examples/src/test/resources/subjects.txt
+++ /dev/null
@@ -1,2 +0,0 @@
diff --git a/examples/src/test/resources/wdbc.infos b/examples/src/test/resources/wdbc.infos
deleted file mode 100644
index 94a63d6..0000000
--- a/examples/src/test/resources/wdbc.infos
+++ /dev/null
@@ -1,32 +0,0 @@
-NUMERICAL, 6.9, 28.2
-NUMERICAL, 9.7, 39.3
-NUMERICAL, 43.7, 188.5
-NUMERICAL, 143.5, 2501.0
-NUMERICAL, 0.0, 0.2
-NUMERICAL, 0.0, 0.4
-NUMERICAL, 0.0, 0.5
-NUMERICAL, 0.0, 0.3
-NUMERICAL, 0.1, 0.4
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.1, 2.9
-NUMERICAL, 0.3, 4.9
-NUMERICAL, 0.7, 22.0
-NUMERICAL, 6.8, 542.3
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.0, 0.2
-NUMERICAL, 0.0, 0.4
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 0.0, 0.1
-NUMERICAL, 7.9, 36.1
-NUMERICAL, 12.0, 49.6
-NUMERICAL, 50.4, 251.2
-NUMERICAL, 185.2, 4254.0
-NUMERICAL, 0.0, 0.3
-NUMERICAL, 0.0, 1.1
-NUMERICAL, 0.0, 1.3
-NUMERICAL, 0.0, 0.3
-NUMERICAL, 0.1, 0.7
-NUMERICAL, 0.0, 0.3