Example usage for opennlp.tools.util TrainingParameters getSettings

List of usage examples for opennlp.tools.util TrainingParameters getSettings

Introduction

In this page you can find the example usage for opennlp.tools.util TrainingParameters getSettings.

Prototype

public Map<String, String> getSettings() 

Source Link

Document

Retrieves all parameters without a name space.

Usage

From source file:edu.usc.irds.agepredictor.spark.authorage.AgePredictSGDTrainer.java

public static AgePredictModel createModel(String languageCode, SparkSession spark, String eventDir,
        AgeClassifyContextGeneratorWrapper wrapper, TrainingParameters trainParams) throws IOException {

    Map<String, String> params = trainParams.getSettings();

    int cutoff = getCutoff(params);
    int iterations = getIterations(params);

    JavaRDD<String> data = spark.sparkContext().textFile(eventDir, 24).toJavaRDD().cache();

    JavaRDD<Row> samples = data.map(new Function<String, Row>() {
        public Row call(String s) {
            if (s == null) {
                return null;
            }/*from  ww  w .j  a v a  2  s  . c  o  m*/
            String[] parts = s.split(",");
            if (parts.length != 3) {
                return null;
            }
            try {
                if (parts[0] != "-1") {
                    Integer value = Integer.parseInt(parts[0]);

                    String[] text = parts[2].split(" ");
                    //add in the category as another feature
                    List<String> tokens = new ArrayList<String>(Arrays.asList(text));

                    for (int i = 0; i < text.length / 18; i++) {
                        tokens.add("cat=" + parts[1]);
                    }

                    //System.out.println("Event:" + value + "," + Arrays.toString(tokens.toArray()));
                    return RowFactory.create(value, tokens.toArray());
                } else {
                    return null;
                }
            } catch (Exception e) {
                return null;
            }

        }
    }).cache();

    JavaRDD<Row> validSamples = samples.filter(new Function<Row, Boolean>() {
        @Override
        public Boolean call(Row s) {
            return s != null;
        }
    }).cache();

    samples.unpersist();

    StructType schema = new StructType(new StructField[] {
            new StructField("value", DataTypes.IntegerType, false, Metadata.empty()),
            new StructField("context", new ArrayType(DataTypes.StringType, true), false, Metadata.empty()) });

    Dataset<Row> df = spark.createDataFrame(validSamples, schema).cache();

    CountVectorizerModel cvm = new CountVectorizer().setInputCol("context").setOutputCol("feature")
            .setMinDF(cutoff).fit(df);

    Normalizer normalizer = new Normalizer().setInputCol("feature").setOutputCol("normFeature").setP(1.0);

    Dataset<Row> eventDF = cvm.transform(df).select("value", "feature");
    //System.out.println("Vocab: " + cvm.vocabulary().length + "," + Arrays.toString(cvm.vocabulary()));
    Dataset<Row> normDF = normalizer.transform(eventDF).select("value", "normFeature");

    JavaRDD<Row> events = normDF.javaRDD().cache();

    eventDF.unpersist();
    normDF.unpersist();

    JavaRDD<LabeledPoint> parsedData = events.map(new Function<Row, LabeledPoint>() {
        public LabeledPoint call(Row r) {
            Integer val = r.getInt(0);
            SparseVector vec = (SparseVector) r.get(1);

            Vector features = Vectors.sparse(vec.size(), vec.indices(), vec.values());
            return new LabeledPoint(val, features);
        }
    }).cache();

    double stepSize = getStepSize(params);
    double regParam = getReg(params);

    LassoWithSGD algorithm = (LassoWithSGD) new LassoWithSGD().setIntercept(true);

    algorithm.optimizer().setNumIterations(iterations).setStepSize(stepSize).setRegParam(regParam);

    final LassoModel model = algorithm.run(JavaRDD.toRDD(parsedData));

    System.out.println("Coefficients: " + Arrays.toString(model.weights().toArray()));
    System.out.println("Intercept: " + model.intercept());

    // Evaluate model on training examples and compute training error
    JavaRDD<Tuple2<Double, Double>> valuesAndPreds = parsedData
            .map(new Function<LabeledPoint, Tuple2<Double, Double>>() {
                public Tuple2<Double, Double> call(LabeledPoint point) {
                    double prediction = model.predict(point.features());
                    System.out.println(prediction + "," + point.label());
                    return new Tuple2<>(prediction, point.label());
                }
            }).cache();

    double MAE = new JavaDoubleRDD(valuesAndPreds.map(new Function<Tuple2<Double, Double>, Object>() {
        public Object call(Tuple2<Double, Double> pair) {
            return Math.abs(pair._1() - pair._2());
        }
    }).rdd()).mean();

    JavaRDD<Vector> vectors = valuesAndPreds.map(new Function<Tuple2<Double, Double>, Vector>() {
        public Vector call(Tuple2<Double, Double> pair) {
            return Vectors.dense(pair._1(), pair._2());
        }
    });
    Matrix correlMatrix = Statistics.corr(vectors.rdd(), "pearson");

    System.out.println("Training Mean Absolute Error: " + MAE);
    System.out.println("Correlation:\n" + correlMatrix.toString());

    Map<String, String> manifestInfoEntries = new HashMap<String, String>();
    return new AgePredictModel(languageCode, model, cvm.vocabulary(), wrapper);
}

From source file:es.ehu.si.ixa.pipe.nerc.train.InputOutputUtils.java

private static TrainingParameters loadTrainingParameters(String paramFile, boolean supportSequenceTraining) {

    TrainingParameters params = null;

    if (paramFile != null) {

        checkInputFile("Training Parameter", new File(paramFile));

        InputStream paramsIn = null;
        try {//  w  ww  .jav  a 2  s  . c  om
            paramsIn = new FileInputStream(new File(paramFile));

            params = new opennlp.tools.util.TrainingParameters(paramsIn);
        } catch (IOException e) {
            throw new TerminateToolException(-1, "Error during parameters loading: " + e.getMessage(), e);
        } finally {
            try {
                if (paramsIn != null)
                    paramsIn.close();
            } catch (IOException e) {
                // sorry that this can fail
            }
        }

        if (!TrainUtil.isValid(params.getSettings())) {
            throw new TerminateToolException(1, "Training parameters file '" + paramFile + "' is invalid!");
        }

        if (!supportSequenceTraining && TrainUtil.isSequenceTraining(params.getSettings())) {
            throw new TerminateToolException(1, "Sequence training is not supported!");
        }
    }

    return params;
}