List of usage examples for org.apache.lucene.analysis TokenStream close
@Override public void close() throws IOException
From source file:org.wltea.analyzer.ik_analyzer5.IKAnalzyerTest.java
License:Apache License
@Test public void testIK() { String text = "???"; //IK?smart??//from ww w . ja v a2s . com Analyzer analyzer = new IKAnalyzer(true); //?LuceneTokenStream TokenStream ts = null; try { ts = analyzer.tokenStream("myfield", new StringReader(text)); //??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); //?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); //?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); //?TokenStream?StringReader ts.reset(); //?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } //TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); } finally { //TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } analyzer.close(); } }
From source file:org.wltea.analyzer.sample.IKAnalzyerDemo.java
License:Apache License
public static void main(String[] args) { //IK?smart??// w w w . j a v a 2 s .c o m Analyzer analyzer = new IKAnalyzerP(true); //?LuceneTokenStream TokenStream ts = null; try { // ts = analyzer.tokenStream("myfield", new StringReader("WORLD ,.. html DATA</html>HELLO")); ts = analyzer.tokenStream("myfield", new StringReader( "?????IKAnalyer can analysis english text too")); // ts = analyzer.tokenStream("myfield", new StringReader("???pinyin hanyu Contribute index to jpinyin development by creating an account on GitHub")); //??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); //?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); //?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); //?TokenStream?StringReader ts.reset(); //?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } //TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); } finally { //TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:org.wltea.analyzer.sample.ThulacAnalzyerDemo.java
License:Apache License
public static void main(String[] args) { //Thulac?smart?? Analyzer analyzer = new ThulacAnalyzer(true); //?LuceneTokenStream TokenStream ts = null; try {// w w w .jav a 2s. co m long start = System.currentTimeMillis(); ts = analyzer.tokenStream("myfield", new StringReader( "?????IKAnalyer can analysis english text too")); //??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); //?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); //?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); //?TokenStream?StringReader ts.reset(); //?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } //TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. System.out.println("wast:" + (System.currentTimeMillis() - start)); } catch (IOException e) { e.printStackTrace(); } finally { //TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:org.wltea.analyzer.test.IKAnalzyerDemo.java
License:Apache License
public static void main(String[] args) { //IK?smart??//from w w w .ja va 2 s. c o m Analyzer analyzer = new IKAnalyzer4PinYin(true); //?LuceneTokenStream TokenStream ts = null; try { ts = analyzer.tokenStream("myfield", new StringReader( "?????IKAnalyer can analysis english text too")); //??? OffsetAttribute offset = ts.addAttribute(OffsetAttribute.class); //?? CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); //?? TypeAttribute type = ts.addAttribute(TypeAttribute.class); //?TokenStream?StringReader ts.reset(); //?? while (ts.incrementToken()) { System.out.println(offset.startOffset() + " - " + offset.endOffset() + " : " + term.toString() + " | " + type.type()); } //TokenStreamStringReader ts.end(); // Perform end-of-stream operations, e.g. set the final offset. } catch (IOException e) { e.printStackTrace(); } finally { //TokenStream? if (ts != null) { try { ts.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:org.zenoss.zep.index.impl.lucene.LuceneQueryBuilder.java
License:Open Source License
/** * Tokenizes the given query using the same behavior as when the field is analyzed. * * @param fieldName The field name in the index. * @param analyzer The analyzer to use to tokenize the query. * @param query The query to tokenize. * @return The tokens from the query./* w ww. jav a 2 s .c om*/ * @throws ZepException If an exception occur. */ private static List<String> getTokens(String fieldName, Analyzer analyzer, String query) throws ZepException { final List<String> tokens = new ArrayList<String>(); try { TokenStream ts = analyzer.tokenStream(fieldName, new StringReader(query)); CharTermAttribute term = ts.addAttribute(CharTermAttribute.class); try { ts.reset(); while (ts.incrementToken()) { tokens.add(term.toString()); } ts.end(); } catch (IOException e) { throw new ZepException(e.getLocalizedMessage(), e); } finally { ts.close(); } } catch (IOException e) { throw new ZepException(e.getLocalizedMessage(), e); } return tokens; }
From source file:org.zilverline.lucene.BoostingParser.java
License:Open Source License
/** * Callback that returns Query with boosted fields using BoostFactors * /* w ww. j a v a 2s . c o m*/ * @param field the field to query * @param analyzer the analyzer to use * @param queryText the query * * @return Query object * * @throws ParseException if Query can't be made * */ protected Query getFieldQuery(String field, Analyzer analyzer, String queryText) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count // for field that contain 'contents' add boostfactors for other terms // specified in BoostFactor if (factors != null && factors.getFactors() != null && !factors.getFactors().isEmpty() && defaultField.equals(field)) { TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); Vector v = new Vector(); org.apache.lucene.analysis.Token t; while (true) { try { t = source.next(); } catch (IOException e) { t = null; } if (t == null) { break; } v.addElement(t.termText()); log.debug(field + " , " + t.termText()); } try { source.close(); } catch (IOException e) { log.error("Unexpected Exception"); } if (v.size() == 0) { return null; } else { // create a new composed query BooleanQuery bq = new BooleanQuery(); // For all boostfactors create a new PhraseQuery Iterator iter = factors.getFactors().entrySet().iterator(); while (iter.hasNext()) { Map.Entry element = (Map.Entry) iter.next(); String thisField = ((String) element.getKey()).toLowerCase(); Float boost = (Float) element.getValue(); PhraseQuery q = new PhraseQuery(); // and add all the terms of the query for (int i = 0; i < v.size(); i++) { q.add(new Term(thisField, (String) v.elementAt(i))); } // boost the query q.setBoost(boost.floatValue()); // and add it to the composed query bq.add(q, false, false); } log.debug("Query: " + bq); return bq; } } else { log.debug("Treat like normal query: " + queryText); return super.getFieldQuery(field, analyzer, queryText); } }
From source file:perf.TestAnalyzerPerf.java
License:Apache License
private static void testAnalyzer(String desc, File wikiLinesFile, Analyzer a, int warmupCount, int runCount) throws Exception { System.out.println("\nTEST: " + desc); // 64 KB buffer InputStream is = new FileInputStream(wikiLinesFile); BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"), 1 << 16); long startTime = System.currentTimeMillis(); long sumTime = 0; long hash = 0; long tokenCount = 0; int totCount = warmupCount + runCount; for (int i = 0; i < totCount; i++) { boolean isWarmup = i < warmupCount; if (i % 10000 == 0) { System.out.println(String.format(Locale.ROOT, "%.1f sec: %d...", (System.currentTimeMillis() - startTime) / 1000.0, i)); }/*from w ww . j a va 2 s . com*/ String s = reader.readLine(); long t0 = System.nanoTime(); TokenStream ts = a.tokenStream("field", new StringReader(s)); ts.reset(); CharTermAttribute termAtt = ts.getAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncAtt; if (ts.hasAttribute(PositionIncrementAttribute.class)) { posIncAtt = ts.getAttribute(PositionIncrementAttribute.class); } else { posIncAtt = null; } OffsetAttribute offsetAtt; if (ts.hasAttribute(OffsetAttribute.class)) { offsetAtt = ts.getAttribute(OffsetAttribute.class); } else { offsetAtt = null; } while (ts.incrementToken()) { hash += 31 * ArrayUtil.hashCode(termAtt.buffer(), 0, termAtt.length()); if (posIncAtt != null) { hash += 31 * posIncAtt.getPositionIncrement(); } if (offsetAtt != null) { hash += 31 * offsetAtt.startOffset(); hash += 31 * offsetAtt.endOffset(); } if (isWarmup == false) { tokenCount++; } } ts.end(); ts.close(); if (isWarmup == false) { sumTime += System.nanoTime() - t0; } } reader.close(); System.out.println(String.format(Locale.ROOT, "%s time=%.2f msec hash=%d tokens=%d", desc, (sumTime / 1000000.0), hash, tokenCount)); }
From source file:practica2_1.Practica2_1.java
public static List<String> tokenizeString(Analyzer analyzer, String string) { List<String> result = new ArrayList<String>(); String cad;// w w w.j ava 2s.c o m try { TokenStream stream = analyzer.tokenStream(null, new StringReader(string)); //OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); CharTermAttribute cAtt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { //cad = stream.getAttribute(CharTermAttribute.class).toString(); result.add(cAtt.toString()); } stream.close(); stream.end(); } catch (IOException e) { // not thrown b/c we're using a string reader... throw new RuntimeException(e); } return result; }
From source file:practica3b.Practica3b.java
public static TopDocs busquedaAuthor(IndexSearcher is, String tipo, String tipo_year, String authors, Integer num1, Integer num2, FacetsCollector fc) throws IOException { Analyzer analizador = new StandardAnalyzer(); List<String> palabras = new ArrayList<String>(); try {// ww w . ja va 2 s .c o m TokenStream stream = analizador.tokenStream(null, new StringReader(authors)); CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { palabras.add(catt.toString()); } stream.close(); stream.end(); } catch (IOException e) { throw new RuntimeException(e); } ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>(); for (int i = 0; i < palabras.size(); i++) { Query query = new TermQuery(new Term("Authors", palabras.get(0))); if (tipo.equals("should")) bc.add(new BooleanClause(query, BooleanClause.Occur.SHOULD)); else if (tipo.equals("must")) bc.add(new BooleanClause(query, BooleanClause.Occur.MUST)); } BooleanQuery.Builder bqbuilder = new BooleanQuery.Builder(); for (int i = 0; i < bc.size(); i++) { bqbuilder.add(bc.get(i)); } if (num1 != null) { Query q; if (num2 == null) { q = IntPoint.newExactQuery("Year", num1); bqbuilder.add(q, BooleanClause.Occur.MUST); } else { if (tipo_year.equals("range")) { q = IntPoint.newRangeQuery("Year", num1, num2); bqbuilder.add(q, BooleanClause.Occur.MUST); } else { q = IntPoint.newSetQuery("Year", num1, num2); bqbuilder.add(q, BooleanClause.Occur.MUST); } } } BooleanQuery bq = bqbuilder.build(); fc = new FacetsCollector(); TopDocs td = FacetsCollector.search(is, bq, 10, fc); for (ScoreDoc scoreDoc : td.scoreDocs) { Document doc = is.doc(scoreDoc.doc); System.out.println(scoreDoc.score + " - " + doc.get("Authors") + " - " + doc.get("Title") + " - Year: " + doc.get("Year")); } return td; }
From source file:practica3b.Practica3b.java
public static ArrayList<BooleanClause> createClause(String busqueda, int tipo_busqueda, String tipo) { Analyzer analizador;/* w w w . j a v a 2 s. com*/ List<String> palabras = new ArrayList<String>(); if (tipo_busqueda == 1) { analizador = new EnglishAnalyzer(); } else if (tipo_busqueda == 2) { analizador = new StandardAnalyzer(); } else { analizador = new EnglishAnalyzer(); } try { TokenStream stream = analizador.tokenStream(null, new StringReader(busqueda)); CharTermAttribute catt = stream.addAttribute(CharTermAttribute.class); stream.reset(); while (stream.incrementToken()) { palabras.add(catt.toString()); } stream.close(); stream.end(); } catch (IOException e) { throw new RuntimeException(e); } ArrayList<BooleanClause> bc = new ArrayList<BooleanClause>(); for (int i = 0; i < palabras.size(); i++) { Query q; if (tipo_busqueda == 1) q = new TermQuery(new Term("Title", palabras.get(i))); else if (tipo_busqueda == 2) q = new TermQuery(new Term("Authors", palabras.get(i))); else q = new TermQuery(new Term("Abstract", palabras.get(i))); if (tipo.equals("should")) bc.add(new BooleanClause(q, BooleanClause.Occur.SHOULD)); else if (tipo.equals("must")) bc.add(new BooleanClause(q, BooleanClause.Occur.MUST)); } return bc; }