List of usage examples for org.apache.mahout.classifier.sgd CsvRecordFactory getTraceDictionary
@Override
public Map<String, Set<Integer>> getTraceDictionary()
From source file:com.ml.ira.algos.TrainLogistic.java
License:Apache License
static void mainToOutput(String[] args, PrintWriter output) throws Exception { if (parseArgs(args)) { double logPEstimate = 0; int samples = 0; System.out.println("fieldNames: " + fieldNames); long ts = System.currentTimeMillis(); CsvRecordFactory csv = lmp.getCsvRecordFactory(); OnlineLogisticRegression lr = lmp.createRegression(); for (int pass = 0; pass < passes; pass++) { System.out.println("at Round: " + pass); BufferedReader in = open(inputFile); try { // read variable names String line;/*w ww . j av a2 s.co m*/ if (fieldNames != null && fieldNames.length() > 0) { csv.firstLine(fieldNames); } else { csv.firstLine(in.readLine()); } line = in.readLine(); while (line != null) { // for each new line, get target and predictors Vector input = new RandomAccessSparseVector(lmp.getNumFeatures()); int targetValue = csv.processLine(line, input); // check performance while this is still news double logP = lr.logLikelihood(targetValue, input); if (!Double.isInfinite(logP)) { if (samples < 20) { logPEstimate = (samples * logPEstimate + logP) / (samples + 1); } else { logPEstimate = 0.95 * logPEstimate + 0.05 * logP; } samples++; } double p = lr.classifyScalar(input); if (scores) { output.printf(Locale.ENGLISH, "%10d %2d %10.2f %2.4f %10.4f %10.4f%n", samples, targetValue, lr.currentLearningRate(), p, logP, logPEstimate); } // now update model lr.train(targetValue, input); line = in.readLine(); } } finally { Closeables.close(in, true); } output.println("duration: " + (System.currentTimeMillis() - ts)); } if (outputFile.startsWith("hdfs://")) { lmp.saveTo(new Path(outputFile)); } else { OutputStream modelOutput = new FileOutputStream(outputFile); try { lmp.saveTo(modelOutput); } finally { Closeables.close(modelOutput, false); } } output.println("duration: " + (System.currentTimeMillis() - ts)); output.println(lmp.getNumFeatures()); output.println(lmp.getTargetVariable() + " ~ "); String sep = ""; for (String v : csv.getTraceDictionary().keySet()) { double weight = predictorWeight(lr, 0, csv, v); if (weight != 0) { output.printf(Locale.ENGLISH, "%s%.3f*%s", sep, weight, v); sep = " + "; } } output.printf("%n"); model = lr; for (int row = 0; row < lr.getBeta().numRows(); row++) { for (String key : csv.getTraceDictionary().keySet()) { double weight = predictorWeight(lr, row, csv, key); if (weight != 0) { output.printf(Locale.ENGLISH, "%20s %.5f%n", key, weight); } } for (int column = 0; column < lr.getBeta().numCols(); column++) { output.printf(Locale.ENGLISH, "%15.9f ", lr.getBeta().get(row, column)); } output.println(); } } }
From source file:haflow.component.mahout.logistic.TrainLogistic.java
License:Apache License
static void mainToOutput(String[] args) throws Exception { if (parseArgs(args)) { double logPEstimate = 0; int samples = 0; OutputStream o = HdfsUtil.writeHdfs(inforFile); PrintWriter output = new PrintWriter(o, true); CsvRecordFactory csv = lmp.getCsvRecordFactory(); OnlineLogisticRegression lr = lmp.createRegression(); for (int pass = 0; pass < passes; pass++) { BufferedReader in = new BufferedReader(new InputStreamReader(HdfsUtil.open(inputFile))); try { // read variable names csv.firstLine(in.readLine()); String line = in.readLine(); while (line != null) { // for each new line, get target and predictors Vector input = new RandomAccessSparseVector(lmp.getNumFeatures()); int targetValue = csv.processLine(line, input); // check performance while this is still news double logP = lr.logLikelihood(targetValue, input); if (!Double.isInfinite(logP)) { if (samples < 20) { logPEstimate = (samples * logPEstimate + logP) / (samples + 1); } else { logPEstimate = 0.95 * logPEstimate + 0.05 * logP; }/* ww w. ja va2s . c o m*/ samples++; } double p = lr.classifyScalar(input); if (scores) { output.printf(Locale.ENGLISH, "%10d %2d %10.2f %2.4f %10.4f %10.4f%n", samples, targetValue, lr.currentLearningRate(), p, logP, logPEstimate); } // now update model lr.train(targetValue, input); line = in.readLine(); } } finally { Closeables.close(in, true); } } //OutputStream modelOutput = new FileOutputStream(outputFile); OutputStream modelOutput = HdfsUtil.writeHdfs(outputFile); try { lmp.saveTo(modelOutput); } finally { Closeables.close(modelOutput, false); } output.println(lmp.getNumFeatures()); output.println(lmp.getTargetVariable() + " ~ "); String sep = ""; for (String v : csv.getTraceDictionary().keySet()) { double weight = predictorWeight(lr, 0, csv, v); if (weight != 0) { output.printf(Locale.ENGLISH, "%s%.3f*%s", sep, weight, v); sep = " + "; } } output.printf("%n"); model = lr; for (int row = 0; row < lr.getBeta().numRows(); row++) { for (String key : csv.getTraceDictionary().keySet()) { double weight = predictorWeight(lr, row, csv, key); if (weight != 0) { output.printf(Locale.ENGLISH, "%20s %.5f%n", key, weight); } } for (int column = 0; column < lr.getBeta().numCols(); column++) { output.printf(Locale.ENGLISH, "%15.9f ", lr.getBeta().get(row, column)); } output.println(); } output.close(); } }