List of usage examples for org.apache.lucene.analysis CachingTokenFilter incrementToken
@Override public final boolean incrementToken() throws IOException
From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.processors.SKOSQueryNodeProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { FieldQueryNode fieldNode = ((FieldQueryNode) node); String text = fieldNode.getTextAsString(); String field = fieldNode.getFieldAsString(); TokenStream source;/*from w w w .j a va 2 s .c o m*/ try { source = this.analyzer.tokenStream(field, text); source.reset(); } catch (IOException e1) { throw new RuntimeException(e1); } CachingTokenFilter buffer = new CachingTokenFilter(source); PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; boolean severalTokensAtSamePosition = false; if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } try { while (buffer.incrementToken()) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } } } catch (IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } else if (numTokens == 1) { String term = null; try { boolean hasNext; hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } fieldNode.setText(term); return fieldNode; } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) { if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) { // no phrase query: LinkedList<QueryNode> children = new LinkedList<QueryNode>(); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (buffer.hasAttribute(SKOSTypeAttribute.class) && boosts != null) { SKOSTypeAttribute skosAttr = buffer.getAttribute(SKOSTypeAttribute.class); children.add(new BoostQueryNode(new FieldQueryNode(field, term, -1, -1), getBoost(skosAttr.getSkosType()))); } else { children.add(new FieldQueryNode(field, term, -1, -1)); } } return new GroupQueryNode(new StandardBooleanQueryNode(children, positionCount == 1)); } else { // phrase query: MultiPhraseQueryNode mpq = new MultiPhraseQueryNode(); List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>(); int position = -1; int i = 0; int termGroupCount = 0; for (; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { for (FieldQueryNode termNode : multiTerms) { if (this.positionIncrementsEnabled) { termNode.setPositionIncrement(position); } else { termNode.setPositionIncrement(termGroupCount); } mpq.add(termNode); } // Only increment once for each "group" of // terms that were in the same position: termGroupCount++; multiTerms.clear(); } position += positionIncrement; multiTerms.add(new FieldQueryNode(field, term, -1, -1)); } for (FieldQueryNode termNode : multiTerms) { if (this.positionIncrementsEnabled) { termNode.setPositionIncrement(position); } else { termNode.setPositionIncrement(termGroupCount); } mpq.add(termNode); } return mpq; } } else { TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); if (this.positionIncrementsEnabled) { position += positionIncrement; newFieldNode.setPositionIncrement(position); } else { newFieldNode.setPositionIncrement(i); } pq.add(newFieldNode); } return pq; } } return node; }
From source file:biospectra.classify.Classifier.java
License:Apache License
private void createNaiveKmerQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException { while (stream.incrementToken()) { Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); builder.add(new TermQuery(t), BooleanClause.Occur.SHOULD); }/*from w w w. j a va 2s . co m*/ }
From source file:biospectra.classify.Classifier.java
License:Apache License
private void createChainProximityQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException { Term termArr[] = new Term[2]; long offsetArr[] = new long[2]; for (int i = 0; i < 2; i++) { termArr[i] = null;//from w w w . j a v a 2 s. co m offsetArr[i] = 0; } while (stream.incrementToken()) { Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); if (termArr[0] == null) { termArr[0] = t; offsetArr[0] = offsetAtt.startOffset(); } else if (termArr[1] == null) { termArr[1] = t; offsetArr[1] = offsetAtt.startOffset(); } else { // shift termArr[0] = termArr[1]; offsetArr[0] = offsetArr[1]; // fill termArr[1] = t; offsetArr[1] = offsetAtt.startOffset(); } if (termArr[0] != null && termArr[1] != null) { long offsetDiff = offsetArr[1] - offsetArr[0]; if (offsetDiff > 0) { PhraseQuery.Builder pq = new PhraseQuery.Builder(); pq.setSlop((int) (offsetDiff) + 1); pq.add(termArr[0]); pq.add(termArr[1]); builder.add(pq.build(), BooleanClause.Occur.SHOULD); } } } }
From source file:biospectra.classify.Classifier.java
License:Apache License
private void createPairedProximityQueryClauses(BooleanQuery.Builder builder, String field, CachingTokenFilter stream, TermToBytesRefAttribute termAtt, OffsetAttribute offsetAtt) throws IOException { Term termArr[] = new Term[2]; long offsetArr[] = new long[2]; for (int i = 0; i < 2; i++) { termArr[i] = null;/*from w ww . j a va 2s . c o m*/ offsetArr[i] = 0; } int count = 0; while (stream.incrementToken()) { Term t = new Term(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); if (count % 2 == 0) { termArr[0] = t; offsetArr[0] = offsetAtt.startOffset(); } else { termArr[1] = t; offsetArr[1] = offsetAtt.startOffset(); long offsetDiff = offsetArr[1] - offsetArr[0]; if (offsetDiff > 0) { PhraseQuery.Builder pq = new PhraseQuery.Builder(); pq.setSlop((int) (offsetDiff) + 1); pq.add(termArr[0]); pq.add(termArr[1]); builder.add(pq.build(), BooleanClause.Occur.SHOULD); } termArr[0] = null; termArr[1] = null; } count++; } if (termArr[0] != null) { builder.add(new TermQuery(termArr[0]), BooleanClause.Occur.SHOULD); termArr[0] = null; } }
From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java
License:Open Source License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final TokenStream source; try {//from w w w .j a va2 s. com source = this.analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); int numTokens = 0; try { while (buffer.incrementToken()) { numTokens++; } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } // phrase query else if (numTokens != 1) { String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node); final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); // assign datatype pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); for (int i = 0; i < numTokens; i++) { String term = null; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); // set position increment newFieldNode.setPositionIncrement(i); // assign datatype newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); pq.add(newFieldNode); } return pq; } } return node; }
From source file:org.allenai.blacklab.queryParser.lucene.QueryParserBase.java
License:Apache License
/** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow *//* w w w.j av a 2 s.c om*/ protected TextPattern newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source; try { source = analyzer.tokenStream(field, new StringReader(queryText)); source.reset(); } catch (IOException e) { ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text"); p.initCause(e); throw p; } CachingTokenFilter buffer = new CachingTokenFilter(source); TermToBytesRefAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; buffer.reset(); if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { ParseException p = new ParseException("Cannot close TokenStream analyzing query text"); p.initCause(e); throw p; } BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef(); if (numTokens == 0) return null; else if (numTokens == 1) { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))); } else { if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) { if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) { // no phrase query: TextPatternBoolean q = newBooleanQuery(positionCount == 1); // BL: BooleanQuery -> TextPatternBoolean BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; for (int i = 0; i < numTokens; i++) { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } TextPattern currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))); q.add(currentQuery, occur); } return q; } else { // phrase query: TPMultiPhrase mpq = newMultiPhraseQuery(); // BL: MultiPhraseQuery -> TPMultiPhrase mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes))); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } return mpq; } } else { TPPhrase pq = newPhraseQuery(); // BL: PhraseQuery -> TPPhrase pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(new Term(field, BytesRef.deepCopyOf(bytes)), position); } else { pq.add(new Term(field, BytesRef.deepCopyOf(bytes))); } } return pq; } } }
From source file:org.apache.fuzzydb.queryParser.QueryParser.java
License:Open Source License
/** * @exception ParseException throw in overridden method to disallow *///ww w .j a v a2 s .c o m protected Query getFieldQuery(String field, String queryText) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source; try { source = analyzer.reusableTokenStream(field, new StringReader(queryText)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); TermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { // success==false if we hit an exception } if (success) { if (buffer.hasAttribute(TermAttribute.class)) { termAtt = buffer.getAttribute(TermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } if (numTokens == 0) return null; else if (numTokens == 1) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } return newTermQuery(new Term(field, term)); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query currentQuery = newTermQuery(new Term(field, term)); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(new Term(field, term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } return mpq; } } else { PhraseQuery pq = newPhraseQuery(); pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(new Term(field, term), position); } else { pq.add(new Term(field, term)); } } return pq; } } }
From source file:org.elasticsearch.index.search.TextQueryParser.java
License:Apache License
public Query parse(Type type) { FieldMapper mapper = null;//from w w w .ja v a 2 s . c om String field = fieldName; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { mapper = smartNameFieldMappers.mapper(); if (mapper != null) { field = mapper.names().indexName(); } } } if (mapper != null && mapper.useFieldQueryWithQueryString()) { return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext); } Analyzer analyzer = null; if (this.analyzer == null) { if (mapper != null) { analyzer = mapper.searchAnalyzer(); } if (analyzer == null) { analyzer = parseContext.mapperService().searchAnalyzer(); } } else { analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer); if (analyzer == null) { throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]"); } } // Logic similar to QueryParser#getFieldQuery TokenStream source; try { source = analyzer.reusableTokenStream(field, new FastStringReader(text)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new FastStringReader(text)); } CachingTokenFilter buffer = new CachingTokenFilter(source); CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { // success==false if we hit an exception } if (success) { if (buffer.hasAttribute(CharTermAttribute.class)) { termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } Term termFactory = new Term(field); if (numTokens == 0) { return MatchNoDocsQuery.INSTANCE; } else if (type == Type.BOOLEAN) { if (numTokens == 1) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query q = newTermQuery(mapper, termFactory.createTerm(term)); return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } BooleanQuery q = new BooleanQuery(positionCount == 1); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term)); q.add(currentQuery, occur); } return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } else if (type == Type.PHRASE) { if (severalTokensAtSamePosition) { MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(termFactory.createTerm(term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); } else { PhraseQuery pq = new PhraseQuery(); pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(termFactory.createTerm(term), position); } else { pq.add(termFactory.createTerm(term)); } } return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext); } } else if (type == Type.PHRASE_PREFIX) { MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery(); mpq.setSlop(phraseSlop); mpq.setMaxExpansions(maxExpansions); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(termFactory.createTerm(term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); } throw new ElasticSearchIllegalStateException("No type found for [" + type + "]"); }
From source file:org.janusgraph.diskstorage.solr.SolrIndex.java
License:Apache License
@SuppressWarnings("unchecked") private List<String> customTokenize(String tokenizerClass, String value) { CachingTokenFilter stream = null; try {/* www .j av a 2s.com*/ final List<String> terms = new ArrayList<>(); final Tokenizer tokenizer = ((Constructor<Tokenizer>) ClassLoader.getSystemClassLoader() .loadClass(tokenizerClass).getConstructor()).newInstance(); tokenizer.setReader(new StringReader(value)); stream = new CachingTokenFilter(tokenizer); final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); while (stream.incrementToken()) { terms.add(termAtt.getBytesRef().utf8ToString()); } return terms; } catch (ReflectiveOperationException | IOException e) { throw new IllegalArgumentException(e.getMessage(), e); } finally { IOUtils.closeQuietly(stream); } }
From source file:org.sindice.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { this.positionIncrementsEnabled = false; final Boolean positionIncrementsEnabled = this.getQueryConfigHandler() .get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS); if (positionIncrementsEnabled != null) { this.positionIncrementsEnabled = positionIncrementsEnabled; }//from w ww .j a v a 2s. c o m final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final String datatype = (String) fieldNode.getTag(DatatypeQueryNode.DATATYPE_TAGID); if (datatype == null) { return node; } final Analyzer analyzer = this.getQueryConfigHandler().get(KeywordConfigurationKeys.DATATYPES_ANALYZERS) .get(datatype); if (analyzer == null) { throw new QueryNodeException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "No analyzer associated with " + datatype)); } PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; boolean severalTokensAtSamePosition = false; final TokenStream source; try { source = analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } try { while (buffer.incrementToken()) { numTokens++; final int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { if (nbTwigs != 0) { // Twig special case return new WildcardNodeQueryNode(); } return new NoTokenFoundQueryNode(); } else if (numTokens == 1) { String term = null; try { boolean hasNext; hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } fieldNode.setText(term); return fieldNode; } else { // no phrase query: final LinkedList<QueryNode> children = new LinkedList<QueryNode>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; final int positionIncrement = 1; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); if (this.positionIncrementsEnabled) { position += positionIncrement; newFieldNode.setPositionIncrement(position); } else { newFieldNode.setPositionIncrement(i); } children.add(new FieldQueryNode(field, term, -1, -1)); } if (node.getParent() instanceof TokenizedPhraseQueryNode) { throw new QueryNodeException(new MessageImpl("Cannot build a MultiPhraseQuery")); } // If multiple terms at one single position, this must be a query // expansion. Perform a OR between the terms. if (severalTokensAtSamePosition && positionCount == 1) { return new GroupQueryNode(new OrQueryNode(children)); } // if several tokens at same position && position count > 1, then // results can be unexpected else { final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); for (int i = 0; i < children.size(); i++) { pq.add(children.get(i)); } return pq; } } } else if (node instanceof TwigQueryNode) { nbTwigs--; assert nbTwigs >= 0; } return node; }