Example usage for opennlp.tools.languagemodel NGramLanguageModel add

List of usage examples for opennlp.tools.languagemodel NGramLanguageModel add

Introduction

In this page you can find the example usage for opennlp.tools.languagemodel NGramLanguageModel add.

Prototype

public void add(StringList ngram, int minLength, int maxLength) 

Source Link

Document

Adds NGrams up to the specified length to the current instance.

Usage

From source file:opennlp.tools.languagemodel.NgramLanguageModelTest.java

@Test
public void testRandomVocabularyAndSentence() throws Exception {
    NGramLanguageModel model = new NGramLanguageModel();
    for (StringList sentence : LanguageModelTestUtils.generateRandomVocabulary(10)) {
        model.add(sentence, 2, 3);
    }/*w  w  w  .j a v a2s . com*/
    double probability = model.calculateProbability(LanguageModelTestUtils.generateRandomSentence());
    Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
            probability >= 0 && probability <= 1);
}

From source file:opennlp.tools.languagemodel.NgramLanguageModelTest.java

@Test
public void testNgramModel() throws Exception {
    NGramLanguageModel model = new NGramLanguageModel(4);
    model.add(new StringList("I", "saw", "the", "fox"), 1, 4);
    model.add(new StringList("the", "red", "house"), 1, 4);
    model.add(new StringList("I", "saw", "something", "nice"), 1, 2);
    double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
    Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
            probability >= 0 && probability <= 1);

    StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
    Assert.assertNotNull(tokens);//w w  w  .j  a va2 s .  co  m
    Assert.assertEquals(new StringList("the", "fox"), tokens);
}

From source file:opennlp.tools.languagemodel.NgramLanguageModelTest.java

@Test
public void testBigramProbabilityNoSmoothing() throws Exception {
    NGramLanguageModel model = new NGramLanguageModel(2, 0);
    model.add(new StringList("<s>", "I", "am", "Sam", "</s>"), 1, 2);
    model.add(new StringList("<s>", "Sam", "I", "am", "</s>"), 1, 2);
    model.add(new StringList("<s>", "I", "do", "not", "like", "green", "eggs", "and", "ham", "</s>"), 1, 2);
    double probability = model.calculateProbability(new StringList("<s>", "I"));
    Assert.assertEquals(0.666d, probability, 0.001);
    probability = model.calculateProbability(new StringList("Sam", "</s>"));
    Assert.assertEquals(0.5d, probability, 0.001);
    probability = model.calculateProbability(new StringList("<s>", "Sam"));
    Assert.assertEquals(0.333d, probability, 0.001);
    probability = model.calculateProbability(new StringList("am", "Sam"));
    Assert.assertEquals(0.5d, probability, 0.001);
    probability = model.calculateProbability(new StringList("I", "am"));
    Assert.assertEquals(0.666d, probability, 0.001);
    probability = model.calculateProbability(new StringList("I", "do"));
    Assert.assertEquals(0.333d, probability, 0.001);
    probability = model.calculateProbability(new StringList("I", "am", "Sam"));
    Assert.assertEquals(0.333d, probability, 0.001);
}

From source file:opennlp.tools.languagemodel.NgramLanguageModelTest.java

@Test
public void testTrigram() throws Exception {
    NGramLanguageModel model = new NGramLanguageModel(3);
    model.add(new StringList("I", "see", "the", "fox"), 2, 3);
    model.add(new StringList("the", "red", "house"), 2, 3);
    model.add(new StringList("I", "saw", "something", "nice"), 2, 3);
    double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
    Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
            probability >= 0 && probability <= 1);

    StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
    Assert.assertNotNull(tokens);/*from w  w w . j  a v a 2s  .  co  m*/
    Assert.assertEquals(new StringList("something", "nice"), tokens);
}

From source file:opennlp.tools.languagemodel.NgramLanguageModelTest.java

@Test
public void testBigram() throws Exception {
    NGramLanguageModel model = new NGramLanguageModel(2);
    model.add(new StringList("I", "see", "the", "fox"), 1, 2);
    model.add(new StringList("the", "red", "house"), 1, 2);
    model.add(new StringList("I", "saw", "something", "nice"), 1, 2);
    double probability = model.calculateProbability(new StringList("I", "saw", "the", "red", "house"));
    Assert.assertTrue("a probability measure should be between 0 and 1 [was " + probability + "]",
            probability >= 0 && probability <= 1);

    StringList tokens = model.predictNextTokens(new StringList("I", "saw"));
    Assert.assertNotNull(tokens);//from   w  ww  .ja  v a2s.  c  o  m
    Assert.assertEquals(new StringList("something"), tokens);
}

From source file:opennlp.tools.languagemodel.NgramLanguageModelTest.java

@Test
public void testTrigramLanguageModelCreationFromText() throws Exception {
    int ngramSize = 3;
    NGramLanguageModel languageModel = new NGramLanguageModel(ngramSize);
    InputStream stream = getClass().getResourceAsStream("/opennlp/tools/languagemodel/sentences.txt");
    for (String line : IOUtils.readLines(stream)) {
        String[] array = line.split(" ");
        List<String> split = Arrays.asList(array);
        List<String> generatedStrings = NGramGenerator.generate(split, ngramSize, " ");
        for (String generatedString : generatedStrings) {
            String[] tokens = generatedString.split(" ");
            if (tokens.length > 0) {
                languageModel.add(new StringList(tokens), 1, ngramSize);
            }//from ww  w.j av  a 2 s . c  om
        }
    }
    StringList tokens = languageModel.predictNextTokens(new StringList("neural", "network", "language"));
    Assert.assertNotNull(tokens);
    Assert.assertEquals(new StringList("models"), tokens);
    double p1 = languageModel.calculateProbability(new StringList("neural", "network", "language", "models"));
    double p2 = languageModel.calculateProbability(new StringList("neural", "network", "language", "model"));
    Assert.assertTrue(p1 > p2);
}