List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:nicta.com.au.failureanalysis.optimalquery.OptPatentQuery.java
private String transformation(TokenStream ts, int treshold, String field) throws IOException { Map<String, Integer> m = new HashMap<>(); String q = ""; CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); ts.reset();//from ww w . j a v a 2 s . c o m int s = 0; while (ts.incrementToken()) { String term = charTermAttribute.toString().replace(":", "\\:"); q += term + " "; if (m.containsKey(term)) { m.put(term, m.get(term) + 1); } else { m.put(term, 1); } s++; } ts.close(); // return q; q = ""; for (String k : m.keySet()) { if (m.get(k) >= treshold) { if (!Functions.isNumeric(k)) { q += k + "^" + m.get(k) + " "; // System.out.println(k); } } } if (field != null) { vocabulary.put(field, m); } fieldsSize.put(field, s); return q; }
From source file:nicta.com.au.failureanalysis.query.QueryGneration.java
private Map<String, Integer> getTerms(TokenStream ts, int treshold, String field) throws IOException { Map<String, Integer> m = new HashMap<>(); Map<String, Integer> qterm_freq = new HashMap<>(); String q = ""; CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); ts.reset();//from ww w.j a v a 2 s . c o m int s = 0; while (ts.incrementToken()) { String term = charTermAttribute.toString().replace(":", "\\:"); q += term + " "; if (m.containsKey(term)) { m.put(term, m.get(term) + 1); } else { m.put(term, 1); } s++; } ts.close(); // return q; q = ""; // int count = 0; for (String k : m.keySet()) { if (m.get(k) >= treshold) { if (!Functions.isNumeric(k)) { q += k + "^" + m.get(k) + " "; qterm_freq.put(k, m.get(k)); // count++; // System.out.println(count + " " + k + " " + m.get(k)); } } } // System.out.println("-------------------"); if (field != null) { vocabulary.put(field, m); } fieldsSize.put(field, s); // return q; return qterm_freq; }
From source file:nicta.com.au.patent.pac.search.PatentQuery.java
private String transformation(TokenStream ts, int treshold, String field) throws IOException { Map<String, Integer> m = new HashMap<>(); String q = ""; CharTermAttribute charTermAttribute = ts.addAttribute(CharTermAttribute.class); ts.reset();//from ww w .j a v a 2 s .c om int s = 0; while (ts.incrementToken()) { String term = charTermAttribute.toString().replace(":", "\\:"); q += term + " "; if (m.containsKey(term)) { m.put(term, m.get(term) + 1); } else { m.put(term, 1); } s++; } ts.close(); // return q; q = ""; for (String k : m.keySet()) { if (m.get(k) >= treshold) { if (!Functions.isNumeric(k)) { // q += k + "^" + m.get(k) + " "; q += k + "^" + 1/*m.get(k)*/ + " "; // System.out.println(k); } } } if (field != null) { vocabulary.put(field, m); } fieldsSize.put(field, s); return q; }
From source file:nl.b3p.viewer.stripes.CatalogSearchActionBean.java
License:Open Source License
private static Or createOrFilter(String queryString, String propertyName) { List orList = new ArrayList(); queryString = createQueryString(queryString, false); if (queryString != null && !queryString.trim().equals(defaultWildCard)) { propertyName = createPropertyName(propertyName); PropertyIsEqualTo propertyIsEqualTo = FilterCreator.createPropertyIsEqualTo(queryString, propertyName); StandardAnalyzer standardAnalyzer = new StandardAnalyzer(Version.LUCENE_45, DutchAnalyzer.getDefaultStopSet()); orList.add(propertyIsEqualTo);//from w w w .j a va 2 s . c o m try { TokenStream tokenStream = standardAnalyzer.tokenStream("", queryString); OffsetAttribute offsetAttribute = tokenStream.addAttribute(OffsetAttribute.class); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); tokenStream.reset(); while (tokenStream.incrementToken()) { int startOffset = offsetAttribute.startOffset(); int endOffset = offsetAttribute.endOffset(); String term = charTermAttribute.toString(); PropertyIsLike propertyIsLike = FilterCreator.createPropertyIsLike(term, propertyName); orList.add(propertyIsLike); } tokenStream.close(); } catch (IOException e) { PropertyIsLike propertyIsLike = FilterCreator.createPropertyIsLike(queryString, propertyName); orList.add(propertyIsLike); } } Or or = new Or(new BinaryLogicOpType(orList)); return or; }
From source file:nl.inl.blacklab.analysis.TestBLDutchAnalyzer.java
License:Apache License
@Test public void testBasics() throws IOException { Reader r = new StringReader("1781 \"hond, a.u.b.: bl(len); \t [pre]cursor \t\nzo'n 'Hij zij' ex-man -"); BLDutchAnalyzer analyzer = new BLDutchAnalyzer(); try {/* ww w . ja va 2 s . c om*/ TokenStream ts = analyzer.tokenStream("contents", r); try { CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("1781", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("hond", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("aub", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("bellen", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("precursor", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("zo'n", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("hij", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("zij", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("ex-man", new String(ta.buffer(), 0, ta.length())); Assert.assertFalse(ts.incrementToken()); } finally { ts.close(); } } finally { analyzer.close(); } }
From source file:nl.inl.blacklab.analysis.TestBLDutchTokenFilter.java
License:Apache License
@Test public void testBasics() throws IOException { TokenStream ts = new StubTokenStream(new String[] { "hond", "a.u.b.", "bel(len)", "[pre]cursor", "zo'n", "'Hij", "zij'", "ex-man", "-" }); try {/*from w ww .j av a 2 s . c o m*/ ts = new BLDutchTokenFilter(ts); ts.reset(); CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("hond", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("aub", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("bellen", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("precursor", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("zo'n", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("Hij", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("zij", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("ex-man", new String(ta.buffer(), 0, ta.length())); Assert.assertFalse(ts.incrementToken()); } finally { ts.close(); } }
From source file:nl.inl.blacklab.analysis.TestBLDutchTokenizer.java
License:Apache License
@Test public void testBasics() throws IOException { Reader r = new StringReader("\"hond, a.u.b.: bl(len); \t [pre]cursor \t\nzo'n 'Hij zij' ex-man -"); TokenStream ts = new BLDutchTokenizer(r); ts.reset();/*from w w w.java 2s .com*/ try { CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("hond", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("a.u.b.", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("bl(len)", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("[pre]cursor", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("zo'n", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("'Hij", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("zij'", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("ex-man", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("-", new String(ta.buffer(), 0, ta.length())); Assert.assertFalse(ts.incrementToken()); } finally { ts.close(); } }
From source file:nl.inl.blacklab.filter.AbstractSynonymFilter.java
License:Apache License
/** * @param args//from w w w . j a v a2 s . c om * @throws IOException */ public static void main(String[] args) throws IOException { TokenStream ts = new WhitespaceTokenizer(Version.LUCENE_42, new StringReader("Dit is een test")); try { ts = new AbstractSynonymFilter(ts) { @Override public String[] getSynonyms(String s) { if (s.equals("test")) return new String[] { "testje" }; if (s.equals("is")) return new String[] { "zijn" }; return null; } }; CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); while (ts.incrementToken()) { System.out.println(new String(term.buffer(), 0, term.length())); } } finally { ts.close(); } }
From source file:nl.inl.blacklab.filter.TestRemoveAllAccentsFilter.java
License:Apache License
@Test public void testRetrieve() throws IOException { TokenStream ts = new StubTokenStream(new String[] { "H", "jij" }); try {/*w ww .j a va 2 s. c om*/ ts = new RemoveAllAccentsFilter(ts); ts.reset(); CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("He", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("jij", new String(ta.buffer(), 0, ta.length())); Assert.assertFalse(ts.incrementToken()); } finally { ts.close(); } }
From source file:nl.inl.blacklab.filter.TestTranscribeGermanAccentsFilter.java
License:Apache License
@Test public void testRetrieve() throws IOException { TokenStream ts = new StubTokenStream(new String[] { "Kln", "Berlin" }); try {/*from w w w. j a v a 2s .c o m*/ ts = new TranscribeGermanAccentsFilter(ts); CharTermAttribute ta = ts.addAttribute(CharTermAttribute.class); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("Koeln", new String(ta.buffer(), 0, ta.length())); Assert.assertTrue(ts.incrementToken()); Assert.assertEquals("Berlin", new String(ta.buffer(), 0, ta.length())); Assert.assertFalse(ts.incrementToken()); } finally { ts.close(); } }