List of usage examples for org.apache.lucene.analysis.tokenattributes TermToBytesRefAttribute getBytesRef
public BytesRef getBytesRef();
From source file:biospectra.classify.Classifier.java
License:Apache License
private void createNaiveKmerQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException { while (stream.incrementToken()) { Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); builder.add(new TermQuery(t), BooleanClause.Occur.SHOULD); }/*w w w .j av a2 s .co m*/ }
From source file:biospectra.classify.Classifier.java
License:Apache License
private void createChainProximityQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException { Term termArr[] = new Term[2]; long offsetArr[] = new long[2]; for (int i = 0; i < 2; i++) { termArr[i] = null;/*from w w w.j a v a 2 s. c o m*/ offsetArr[i] = 0; } while (stream.incrementToken()) { Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); if (termArr[0] == null) { termArr[0] = t; offsetArr[0] = offsetAtt.startOffset(); } else if (termArr[1] == null) { termArr[1] = t; offsetArr[1] = offsetAtt.startOffset(); } else { // shift termArr[0] = termArr[1]; offsetArr[0] = offsetArr[1]; // fill termArr[1] = t; offsetArr[1] = offsetAtt.startOffset(); } if (termArr[0] != null && termArr[1] != null) { long offsetDiff = offsetArr[1] - offsetArr[0]; if (offsetDiff > 0) { PhraseQuery.Builder pq = new PhraseQuery.Builder(); pq.setSlop((int) (offsetDiff) + 1); pq.add(termArr[0]); pq.add(termArr[1]); builder.add(pq.build(), BooleanClause.Occur.SHOULD); } } } }
From source file:biospectra.classify.Classifier.java
License:Apache License
private void createPairedProximityQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException { Term termArr[] = new Term[2]; long offsetArr[] = new long[2]; for (int i = 0; i < 2; i++) { termArr[i] = null;//ww w.j a va 2 s.c o m offsetArr[i] = 0; } int count = 0; while (stream.incrementToken()) { Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); if (count % 2 == 0) { termArr[0] = t; offsetArr[0] = offsetAtt.startOffset(); } else { termArr[1] = t; offsetArr[1] = offsetAtt.startOffset(); long offsetDiff = offsetArr[1] - offsetArr[0]; if (offsetDiff > 0) { PhraseQuery.Builder pq = new PhraseQuery.Builder(); pq.setSlop((int) (offsetDiff) + 1); pq.add(termArr[0]); pq.add(termArr[1]); builder.add(pq.build(), BooleanClause.Occur.SHOULD); } termArr[0] = null; termArr[1] = null; } count++; } if (termArr[0] != null) { builder.add(new TermQuery(termArr[0]), BooleanClause.Occur.SHOULD); termArr[0] = null; } }
From source file:com.stratio.cassandra.index.query.Condition.java
License:Apache License
protected String analyze(String field, String value, ColumnMapper<?> columnMapper) { TokenStream source = null;//from w ww.ja v a 2 s .c o m try { Analyzer analyzer = columnMapper.analyzer(); source = analyzer.tokenStream(field, value); source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) { return null; } termAtt.fillBytesRef(); if (source.incrementToken()) { throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value); } source.end(); return BytesRef.deepCopyOf(bytes).utf8ToString(); } catch (IOException e) { throw new RuntimeException("Error analyzing multiTerm term: " + value, e); } finally { IOUtils.closeWhileHandlingException(source); } }
From source file:com.tuplejump.stargate.lucene.query.Condition.java
License:Apache License
protected String analyze(String field, String value, Analyzer analyzer) { TokenStream source = null;//from w ww. j a v a2s . c om try { source = analyzer.tokenStream(field, value); source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) { return null; } termAtt.fillBytesRef(); if (source.incrementToken()) { throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + value); } source.end(); return BytesRef.deepCopyOf(bytes).utf8ToString(); } catch (IOException e) { throw new RuntimeException("Error analyzing multiTerm term: " + value, e); } finally { IOUtils.closeWhileHandlingException(source); } }
From source file:jp.scaleout.elasticsearch.plugins.queryparser.classic.QueryParserBase.java
License:Apache License
protected BytesRef analyzeMultitermTerm(String field, String part, Analyzer analyzerIn) { if (analyzerIn == null) analyzerIn = getAnalyzer();// w ww . ja va2 s .c o m try (TokenStream source = analyzerIn.tokenStream(field, part)) { source.reset(); TermToBytesRefAttribute termAtt = source.getAttribute(TermToBytesRefAttribute.class); BytesRef bytes = termAtt.getBytesRef(); if (!source.incrementToken()) throw new IllegalArgumentException("analyzer returned no terms for multiTerm term: " + part); //termAtt.fillBytesRef(); if (source.incrementToken()) throw new IllegalArgumentException("analyzer returned too many terms for multiTerm term: " + part); source.end(); return BytesRef.deepCopyOf(bytes); } catch (IOException e) { throw new RuntimeException("Error analyzing multiTerm term: " + part, e); } }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class); while (stream.incrementToken()) { System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); //B }// w w w . ja v a2 s .com }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static String getTerm(AttributeSource source) { TermToBytesRefAttribute attr = source.addAttribute(TermToBytesRefAttribute.class); return attr.getBytesRef().utf8ToString(); }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); TermToBytesRefAttribute term = stream.addAttribute(TermToBytesRefAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ": "); }/*w ww . ja v a 2 s . c o m*/ System.out.print("[" + term.getBytesRef().utf8ToString() + "] "); } System.out.println(); }
From source file:lia.chapter4.AnalyzerUtils.java
License:Apache License
public static void assertAnalyzesTo(Analyzer analyzer, String input, String[] output) throws Exception { TokenStream stream = analyzer.tokenStream("field", new StringReader(input)); TermToBytesRefAttribute termAttr = stream.addAttribute(TermToBytesRefAttribute.class); for (String expected : output) { Assert.assertTrue(stream.incrementToken()); Assert.assertEquals(expected, termAttr.getBytesRef().utf8ToString()); }/*from w ww .ja va 2s . c o m*/ Assert.assertFalse(stream.incrementToken()); stream.close(); }