Example usage for org.apache.lucene.analysis TokenStream close

List of usage examples for org.apache.lucene.analysis TokenStream close

Introduction

In this page you can find the example usage for org.apache.lucene.analysis TokenStream close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Releases resources associated with this stream.

Usage

From source file:nicta.com.au.failureanalysis.optimalquery.OptPatentQuery.java

private String transformation(TokenStream ts, int treshold, String field) throws IOException {
    Map<String, Integer> m = new HashMap<>();
    String q = "";
    CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
    ts.reset();//from  ww w . j a  v  a 2 s  .  c o m
    int s = 0;
    while (ts.incrementToken()) {
        String term = charTermAttribute.toString().replace(":", "\\:");
        q += term + " ";
        if (m.containsKey(term)) {
            m.put(term, m.get(term) + 1);
        } else {
            m.put(term, 1);
        }
        s++;
    }
    ts.close();
    //        return q;
    q = "";
    for (String k : m.keySet()) {
        if (m.get(k) >= treshold) {
            if (!Functions.isNumeric(k)) {
                q += k + "^" + m.get(k) + " ";
                //                    System.out.println(k);
            }
        }
    }
    if (field != null) {
        vocabulary.put(field, m);
    }
    fieldsSize.put(field, s);
    return q;
}

From source file:nicta.com.au.failureanalysis.query.QueryGneration.java

private Map<String, Integer> getTerms(TokenStream ts, int treshold, String field) throws IOException {
    Map<String, Integer> m = new HashMap<>();
    Map<String, Integer> qterm_freq = new HashMap<>();

    String q = "";
    CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
    ts.reset();//from  ww w.j  a  v  a  2 s  . c o  m
    int s = 0;
    while (ts.incrementToken()) {
        String term = charTermAttribute.toString().replace(":", "\\:");
        q += term + " ";
        if (m.containsKey(term)) {
            m.put(term, m.get(term) + 1);
        } else {
            m.put(term, 1);
        }
        s++;
    }
    ts.close();
    //        return q;
    q = "";
    //        int count = 0;
    for (String k : m.keySet()) {
        if (m.get(k) >= treshold) {
            if (!Functions.isNumeric(k)) {
                q += k + "^" + m.get(k) + " ";
                qterm_freq.put(k, m.get(k));
                //                    count++;
                //                    System.out.println(count + " " + k + " " + m.get(k));
            }
        }
    }
    //        System.out.println("-------------------");
    if (field != null) {
        vocabulary.put(field, m);
    }
    fieldsSize.put(field, s);
    //        return q;
    return qterm_freq;
}

From source file:nicta.com.au.patent.pac.search.PatentQuery.java

private String transformation(TokenStream ts, int treshold, String field) throws IOException {
    Map<String, Integer> m = new HashMap<>();
    String q = "";
    CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class);
    ts.reset();//from   ww w  .j  a  v  a  2 s .c  om
    int s = 0;
    while (ts.incrementToken()) {
        String term = charTermAttribute.toString().replace(":", "\\:");
        q += term + " ";
        if (m.containsKey(term)) {
            m.put(term, m.get(term) + 1);
        } else {
            m.put(term, 1);
        }
        s++;
    }
    ts.close();
    //        return q;
    q = "";
    for (String k : m.keySet()) {
        if (m.get(k) >= treshold) {
            if (!Functions.isNumeric(k)) {
                //                    q += k + "^" + m.get(k) + " ";
                q += k + "^" + 1/*m.get(k)*/ + " ";
                //                    System.out.println(k);
            }
        }
    }
    if (field != null) {
        vocabulary.put(field, m);
    }
    fieldsSize.put(field, s);
    return q;
}

From source file:nl.b3p.viewer.stripes.CatalogSearchActionBean.java

License:Open Source License

private static Or createOrFilter(String queryString, String propertyName) {
    List orList = new ArrayList();
    queryString = createQueryString(queryString, false);
    if (queryString != null && !queryString.trim().equals(defaultWildCard)) {

        propertyName = createPropertyName(propertyName);

        PropertyIsEqualTo propertyIsEqualTo = FilterCreator.createPropertyIsEqualTo(queryString, propertyName);

        StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_45,
                DutchAnalyzer.getDefaultStopSet());

        orList.add(propertyIsEqualTo);//from  w w  w .j a  va  2 s  . c o m
        try {

            TokenStream tokenStream = standardAnalyzer.tokenStream("", queryString);
            OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class);
            CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class);

            tokenStream.reset();
            while (tokenStream.incrementToken()) {
                int startOffset = offsetAttribute.startOffset();
                int endOffset = offsetAttribute.endOffset();
                String term = charTermAttribute.toString();
                PropertyIsLike propertyIsLike = FilterCreator.createPropertyIsLike(term, propertyName);
                orList.add(propertyIsLike);
            }
            tokenStream.close();
        } catch (IOException e) {
            PropertyIsLike propertyIsLike = FilterCreator.createPropertyIsLike(queryString, propertyName);
            orList.add(propertyIsLike);
        }
    }

    Or or = new Or(new BinaryLogicOpType(orList));

    return or;
}

From source file:nl.inl.blacklab.analysis.TestBLDutchAnalyzer.java

License:Apache License

@Test
public void testBasics() throws IOException {
    Reader r = new StringReader("1781 \"hond, a.u.b.: bl(len); \t [pre]cursor \t\nzo'n 'Hij zij' ex-man -");
    BLDutchAnalyzer analyzer = new BLDutchAnalyzer();
    try {/* ww w  .  ja  va  2  s . c om*/
        TokenStream ts = analyzer.tokenStream("contents", r);
        try {
            CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class);
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("1781", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("hond", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("aub", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("bellen", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("precursor", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("zo'n", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("hij", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("zij", new String(ta.buffer(), 0, ta.length()));
            Assert.assertTrue(ts.incrementToken());
            Assert.assertEquals("ex-man", new String(ta.buffer(), 0, ta.length()));
            Assert.assertFalse(ts.incrementToken());
        } finally {
            ts.close();
        }
    } finally {
        analyzer.close();
    }
}

From source file:nl.inl.blacklab.analysis.TestBLDutchTokenFilter.java

License:Apache License

@Test
public void testBasics() throws IOException {
    TokenStream ts = new StubTokenStream(new String[] { "hond", "a.u.b.", "bel(len)", "[pre]cursor", "zo'n",
            "'Hij", "zij'", "ex-man", "-" });
    try {/*from w ww  .j  av  a 2 s . c  o  m*/
        ts = new BLDutchTokenFilter(ts);
        ts.reset();
        CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class);
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("hond", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("aub", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("bellen", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("precursor", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("zo'n", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("Hij", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("zij", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("ex-man", new String(ta.buffer(), 0, ta.length()));
        Assert.assertFalse(ts.incrementToken());
    } finally {
        ts.close();
    }
}

From source file:nl.inl.blacklab.analysis.TestBLDutchTokenizer.java

License:Apache License

@Test
public void testBasics() throws IOException {
    Reader r = new StringReader("\"hond, a.u.b.: bl(len); \t [pre]cursor \t\nzo'n 'Hij zij' ex-man -");
    TokenStream ts = new BLDutchTokenizer(r);
    ts.reset();/*from w w w.java  2s  .com*/
    try {
        CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class);
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("hond", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("a.u.b.", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("bl(len)", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("[pre]cursor", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("zo'n", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("'Hij", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("zij'", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("ex-man", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("-", new String(ta.buffer(), 0, ta.length()));
        Assert.assertFalse(ts.incrementToken());
    } finally {
        ts.close();
    }
}

From source file:nl.inl.blacklab.filter.AbstractSynonymFilter.java

License:Apache License

/**
 * @param args//from   w w w .  j a v  a2 s  .  c  om
 * @throws IOException
 */
public static void main(String[] args) throws IOException {
    TokenStream ts = new WhitespaceTokenizer(Version.LUCENE_42, new StringReader("Dit is een test"));
    try {
        ts = new AbstractSynonymFilter(ts) {
            @Override
            public String[] getSynonyms(String s) {
                if (s.equals("test"))
                    return new String[] { "testje" };
                if (s.equals("is"))
                    return new String[] { "zijn" };
                return null;
            }
        };

        CharTermAttribute term = ts.addAttribute(CharTermAttribute.class);
        while (ts.incrementToken()) {
            System.out.println(new String(term.buffer(), 0, term.length()));
        }
    } finally {
        ts.close();
    }
}

From source file:nl.inl.blacklab.filter.TestRemoveAllAccentsFilter.java

License:Apache License

@Test
public void testRetrieve() throws IOException {
    TokenStream ts = new StubTokenStream(new String[] { "H", "jij" });
    try {/*w ww .j  a  va  2 s.  c  om*/
        ts = new RemoveAllAccentsFilter(ts);
        ts.reset();
        CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class);
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("He", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("jij", new String(ta.buffer(), 0, ta.length()));
        Assert.assertFalse(ts.incrementToken());
    } finally {
        ts.close();
    }
}

From source file:nl.inl.blacklab.filter.TestTranscribeGermanAccentsFilter.java

License:Apache License

@Test
public void testRetrieve() throws IOException {
    TokenStream ts = new StubTokenStream(new String[] { "Kln", "Berlin" });
    try {/*from   w  w w. j  a  v  a 2s  .c o  m*/
        ts = new TranscribeGermanAccentsFilter(ts);
        CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class);
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("Koeln", new String(ta.buffer(), 0, ta.length()));
        Assert.assertTrue(ts.incrementToken());
        Assert.assertEquals("Berlin", new String(ta.buffer(), 0, ta.length()));
        Assert.assertFalse(ts.incrementToken());
    } finally {
        ts.close();
    }
}