Example usage for opennlp.tools.ngram NGramModel add

List of usage examples for opennlp.tools.ngram NGramModel add

Introduction

In this page you can find the example usage for opennlp.tools.ngram NGramModel add.

Prototype

public void add(CharSequence chars, int minLength, int maxLength) 

Source Link

Document

Adds character NGrams to the current instance.

Usage

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testAdd2() throws Exception {
    NGramModel ngramModel = new NGramModel();
    ngramModel.add(new StringList("the", "bro", "wn"), 2, 3);
    int count = ngramModel.getCount(new StringList("the", "bro", "wn"));
    Assert.assertEquals(1, count);//from www  . j av  a2  s.  c  o m
    Assert.assertEquals(3, ngramModel.size());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testAdd3() throws Exception {
    NGramModel ngramModel = new NGramModel();
    ngramModel.add(new StringList("the", "brown", "fox"), 2, 3);
    int count = ngramModel.getCount(new StringList("the", "brown", "fox"));
    Assert.assertEquals(1, count);//from   w w  w.j a v a  2s .  c  o m
    count = ngramModel.getCount(new StringList("the", "brown"));
    Assert.assertEquals(1, count);
    count = ngramModel.getCount(new StringList("brown", "fox"));
    Assert.assertEquals(1, count);
    Assert.assertEquals(3, ngramModel.size());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testContains2() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "bro", "wn");
    ngramModel.add(tokens, 1, 3);
    Assert.assertTrue(ngramModel.contains(new StringList("the")));
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testNumberOfGrams() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "bro", "wn");
    ngramModel.add(tokens, 1, 3);
    Assert.assertEquals(6, ngramModel.numberOfGrams());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testCutoff1() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    ngramModel.cutoff(2, 4);//w ww . ja  va  2s . c  o m
    Assert.assertEquals(0, ngramModel.size());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testCutoff2() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    ngramModel.cutoff(1, 3);/*  ww  w . ja va2s  .  c  o  m*/
    Assert.assertEquals(9, ngramModel.size());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testToDictionary() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    tokens = new StringList("the", "brown", "Fox", "jumped");
    ngramModel.add(tokens, 1, 3);//from   www .ja  v  a  2 s. co  m
    Dictionary dictionary = ngramModel.toDictionary();
    Assert.assertNotNull(dictionary);
    Assert.assertEquals(9, dictionary.size());
    Assert.assertEquals(1, dictionary.getMinTokenCount());
    Assert.assertEquals(3, dictionary.getMaxTokenCount());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Test
public void testToDictionary1() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    tokens = new StringList("the", "brown", "Fox", "jumped");
    ngramModel.add(tokens, 1, 3);//from w w w  .  j a v  a2  s  .c  o  m
    Dictionary dictionary = ngramModel.toDictionary(true);
    Assert.assertNotNull(dictionary);
    Assert.assertEquals(14, dictionary.size());
    Assert.assertEquals(1, dictionary.getMinTokenCount());
    Assert.assertEquals(3, dictionary.getMaxTokenCount());
}

From source file:opennlp.tools.ngram.NGramModelTest.java

@Ignore
@Test//from w w w  .  j  a  v a 2 s  .  c  om
public void testSerialize() throws Exception {
    NGramModel ngramModel = new NGramModel();
    StringList tokens = new StringList("the", "brown", "fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    tokens = new StringList("the", "brown", "Fox", "jumped");
    ngramModel.add(tokens, 1, 3);
    ByteArrayOutputStream out = new ByteArrayOutputStream();
    ngramModel.serialize(out);
    Assert.assertNotNull(out);
    InputStream nGramModelStream = getClass().getResourceAsStream("/opennlp/tools/ngram/ngram-model.xml");
    String modelString = IOUtils.toString(nGramModelStream);
    // remove AL header
    int start = modelString.indexOf("<!--");
    int end = modelString.indexOf("-->");
    String asfHeaderString = modelString.substring(start, end + 3);
    modelString = modelString.replace(asfHeaderString, "");
    String outputString = out.toString(Charset.forName("UTF-8").name());
    Assert.assertEquals(
            modelString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""),
            outputString.replaceAll("\n", "").replaceAll("\r", "").replaceAll("\t", "").replaceAll(" ", ""));
}