List of usage examples for org.apache.mahout.classifier.sgd CsvRecordFactory getTargetCategories
@Override
public List<String> getTargetCategories()
From source file:edu.isi.karma.cleaning.features.RecordClassifier2.java
License:Apache License
@SuppressWarnings({ "deprecation" })
public OnlineLogisticRegression train(HashMap<String, Vector<String>> traindata) throws Exception {
String csvTrainFile = "./target/tmp/csvtrain.csv";
Data2Features.Traindata2CSV(traindata, csvTrainFile, rf);
lmp = new LogisticModelParameters();
lmp.setTargetVariable("label");
lmp.setMaxTargetCategories(rf.labels.size());
lmp.setNumFeatures(rf.getFeatureNames().size());
List<String> typeList = Lists.newArrayList();
typeList.add("numeric");
List<String> predictorList = Lists.newArrayList();
for (String attr : rf.getFeatureNames()) {
if (attr.compareTo("lable") != 0) {
predictorList.add(attr);/*w w w . j a v a 2 s. c o m*/
}
}
lmp.setTypeMap(predictorList, typeList);
// lmp.setUseBias(!getBooleanArgument(cmdLine, noBias));
// lmp.setTypeMap(predictorList, typeList);
lmp.setLambda(1e-4);
lmp.setLearningRate(50);
int passes = 100;
CsvRecordFactory csv = lmp.getCsvRecordFactory();
OnlineLogisticRegression lr = lmp.createRegression();
for (int pass = 0; pass < passes; pass++) {
BufferedReader in = new BufferedReader(new FileReader(new File(csvTrainFile)));
;
try {
// read variable names
csv.firstLine(in.readLine());
String line = in.readLine();
while (line != null) {
// for each new line, get target and predictors
RandomAccessSparseVector input = new RandomAccessSparseVector(lmp.getNumFeatures());
int targetValue = csv.processLine(line, input);
// String label =
// csv.getTargetCategories().get(lr.classifyFull(input).maxValueIndex());
// now update model
lr.train(targetValue, input);
line = in.readLine();
}
} finally {
Closeables.closeQuietly(in);
}
}
labels = csv.getTargetCategories();
return lr;
}