List of usage examples for org.apache.lucene.analysis CachingTokenFilter reset
@Override public void reset() throws IOException
From source file:at.ac.univie.mminf.luceneSKOS.queryparser.flexible.standard.processors.SKOSQueryNodeProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { FieldQueryNode fieldNode = ((FieldQueryNode) node); String text = fieldNode.getTextAsString(); String field = fieldNode.getFieldAsString(); TokenStream source;//from www . j a v a 2s. co m try { source = this.analyzer.tokenStream(field, text); source.reset(); } catch (IOException e1) { throw new RuntimeException(e1); } CachingTokenFilter buffer = new CachingTokenFilter(source); PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; boolean severalTokensAtSamePosition = false; if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } try { while (buffer.incrementToken()) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } } } catch (IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } else if (numTokens == 1) { String term = null; try { boolean hasNext; hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } fieldNode.setText(term); return fieldNode; } else if (severalTokensAtSamePosition || !(node instanceof QuotedFieldQueryNode)) { if (positionCount == 1 || !(node instanceof QuotedFieldQueryNode)) { // no phrase query: LinkedList<QueryNode> children = new LinkedList<QueryNode>(); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (buffer.hasAttribute(SKOSTypeAttribute.class) && boosts != null) { SKOSTypeAttribute skosAttr = buffer.getAttribute(SKOSTypeAttribute.class); children.add(new BoostQueryNode(new FieldQueryNode(field, term, -1, -1), getBoost(skosAttr.getSkosType()))); } else { children.add(new FieldQueryNode(field, term, -1, -1)); } } return new GroupQueryNode(new StandardBooleanQueryNode(children, positionCount == 1)); } else { // phrase query: MultiPhraseQueryNode mpq = new MultiPhraseQueryNode(); List<FieldQueryNode> multiTerms = new ArrayList<FieldQueryNode>(); int position = -1; int i = 0; int termGroupCount = 0; for (; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { for (FieldQueryNode termNode : multiTerms) { if (this.positionIncrementsEnabled) { termNode.setPositionIncrement(position); } else { termNode.setPositionIncrement(termGroupCount); } mpq.add(termNode); } // Only increment once for each "group" of // terms that were in the same position: termGroupCount++; multiTerms.clear(); } position += positionIncrement; multiTerms.add(new FieldQueryNode(field, term, -1, -1)); } for (FieldQueryNode termNode : multiTerms) { if (this.positionIncrementsEnabled) { termNode.setPositionIncrement(position); } else { termNode.setPositionIncrement(termGroupCount); } mpq.add(termNode); } return mpq; } } else { TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); if (this.positionIncrementsEnabled) { position += positionIncrement; newFieldNode.setPositionIncrement(position); } else { newFieldNode.setPositionIncrement(i); } pq.add(newFieldNode); } return pq; } } return node; }
From source file:com.bewsia.script.safe.lucene.SEntity.java
License:Open Source License
public String highlight(Query query, String text, String field, int fragmentSize, int maxNumFragments, String separator) throws Exception { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36); CachingTokenFilter tokenStream = new CachingTokenFilter( analyzer.tokenStream(field, new StringReader(text))); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(); Scorer scorer = new org.apache.lucene.search.highlight.QueryScorer(query); Highlighter highlighter = new Highlighter(formatter, scorer); highlighter.setTextFragmenter(new SimpleFragmenter(fragmentSize)); tokenStream.reset(); String rv = highlighter.getBestFragments(tokenStream, text, maxNumFragments, separator); return rv.length() == 0 ? text : rv; }
From source file:com.sindicetech.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java
License:Open Source License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node instanceof ProtectedQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final TokenStream source; try {//from w w w.ja v a2s. co m source = this.analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); int numTokens = 0; try { while (buffer.incrementToken()) { numTokens++; } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } // phrase query else if (numTokens != 1) { String datatype = (String) DatatypeProcessor.getDatatype(this.getQueryConfigHandler(), node); final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); // assign datatype pq.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); for (int i = 0; i < numTokens; i++) { String term = null; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); // set position increment newFieldNode.setPositionIncrement(i); // assign datatype newFieldNode.setTag(DatatypeQueryNode.DATATYPE_TAGID, datatype); pq.add(newFieldNode); } return pq; } } return node; }
From source file:org.allenai.blacklab.queryParser.lucene.QueryParserBase.java
License:Apache License
/** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow *//*from w ww .j av a2s. c o m*/ protected TextPattern newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source; try { source = analyzer.tokenStream(field, new StringReader(queryText)); source.reset(); } catch (IOException e) { ParseException p = new ParseException("Unable to initialize TokenStream to analyze query text"); p.initCause(e); throw p; } CachingTokenFilter buffer = new CachingTokenFilter(source); TermToBytesRefAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; buffer.reset(); if (buffer.hasAttribute(TermToBytesRefAttribute.class)) { termAtt = buffer.getAttribute(TermToBytesRefAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { ParseException p = new ParseException("Cannot close TokenStream analyzing query text"); p.initCause(e); throw p; } BytesRef bytes = termAtt == null ? null : termAtt.getBytesRef(); if (numTokens == 0) return null; else if (numTokens == 1) { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } return newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))); } else { if (severalTokensAtSamePosition || (!quoted && !autoGeneratePhraseQueries)) { if (positionCount == 1 || (!quoted && !autoGeneratePhraseQueries)) { // no phrase query: TextPatternBoolean q = newBooleanQuery(positionCount == 1); // BL: BooleanQuery -> TextPatternBoolean BooleanClause.Occur occur = positionCount > 1 && operator == AND_OPERATOR ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; for (int i = 0; i < numTokens; i++) { try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } TextPattern currentQuery = newTermQuery(new Term(field, BytesRef.deepCopyOf(bytes))); q.add(currentQuery, occur); } return q; } else { // phrase query: TPMultiPhrase mpq = newMultiPhraseQuery(); // BL: MultiPhraseQuery -> TPMultiPhrase mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(new Term(field, BytesRef.deepCopyOf(bytes))); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } return mpq; } } else { TPPhrase pq = newPhraseQuery(); // BL: PhraseQuery -> TPPhrase pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; termAtt.fillBytesRef(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(new Term(field, BytesRef.deepCopyOf(bytes)), position); } else { pq.add(new Term(field, BytesRef.deepCopyOf(bytes))); } } return pq; } } }
From source file:org.apache.fuzzydb.queryParser.QueryParser.java
License:Open Source License
/** * @exception ParseException throw in overridden method to disallow *///from www. j ava 2s . c o m protected Query getFieldQuery(String field, String queryText) throws ParseException { // Use the analyzer to get all the tokens, and then build a TermQuery, // PhraseQuery, or nothing based on the term count TokenStream source; try { source = analyzer.reusableTokenStream(field, new StringReader(queryText)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new StringReader(queryText)); } CachingTokenFilter buffer = new CachingTokenFilter(source); TermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { // success==false if we hit an exception } if (success) { if (buffer.hasAttribute(TermAttribute.class)) { termAtt = buffer.getAttribute(TermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } if (numTokens == 0) return null; else if (numTokens == 1) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } return newTermQuery(new Term(field, term)); } else { if (severalTokensAtSamePosition) { if (positionCount == 1) { // no phrase query: BooleanQuery q = newBooleanQuery(true); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query currentQuery = newTermQuery(new Term(field, term)); q.add(currentQuery, BooleanClause.Occur.SHOULD); } return q; } else { // phrase query: MultiPhraseQuery mpq = newMultiPhraseQuery(); mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(new Term(field, term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[0]), position); } else { mpq.add(multiTerms.toArray(new Term[0])); } return mpq; } } else { PhraseQuery pq = newPhraseQuery(); pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.term(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(new Term(field, term), position); } else { pq.add(new Term(field, term)); } } return pq; } } }
From source file:org.apache.maven.index.DefaultIteratorResultSet.java
License:Apache License
/** * Returns a string that contains match fragment highlighted in style as user requested. * //w w w.j a v a 2s.c o m * @param context * @param hr * @param field * @param text * @return * @throws IOException */ protected List<String> highlightField(IndexingContext context, MatchHighlightRequest hr, IndexerField field, String text) throws IOException { // exception with classnames if (MAVEN.CLASSNAMES.equals(field.getOntology())) { text = text.replace('/', '.').replaceAll("^\\.", "").replaceAll("\n\\.", "\n"); } Analyzer analyzer = context.getAnalyzer(); TokenStream baseTokenStream = analyzer.tokenStream(field.getKey(), new StringReader(text)); CachingTokenFilter tokenStream = new CachingTokenFilter(baseTokenStream); Formatter formatter = null; if (MatchHighlightMode.HTML.equals(hr.getHighlightMode())) { formatter = new SimpleHTMLFormatter(); } else { tokenStream.reset(); tokenStream.end(); tokenStream.close(); throw new UnsupportedOperationException( "Hightlight more \"" + hr.getHighlightMode().toString() + "\" is not supported!"); } List<String> bestFragments = getBestFragments(hr.getQuery(), formatter, tokenStream, text, 3); return bestFragments; }
From source file:org.elasticsearch.index.search.TextQueryParser.java
License:Apache License
public Query parse(Type type) { FieldMapper mapper = null;//w ww .j a va 2 s . c om String field = fieldName; MapperService.SmartNameFieldMappers smartNameFieldMappers = parseContext.smartFieldMappers(fieldName); if (smartNameFieldMappers != null) { if (smartNameFieldMappers.hasMapper()) { mapper = smartNameFieldMappers.mapper(); if (mapper != null) { field = mapper.names().indexName(); } } } if (mapper != null && mapper.useFieldQueryWithQueryString()) { return wrapSmartNameQuery(mapper.fieldQuery(text, parseContext), smartNameFieldMappers, parseContext); } Analyzer analyzer = null; if (this.analyzer == null) { if (mapper != null) { analyzer = mapper.searchAnalyzer(); } if (analyzer == null) { analyzer = parseContext.mapperService().searchAnalyzer(); } } else { analyzer = parseContext.mapperService().analysisService().analyzer(this.analyzer); if (analyzer == null) { throw new ElasticSearchIllegalArgumentException("No analyzer found for [" + this.analyzer + "]"); } } // Logic similar to QueryParser#getFieldQuery TokenStream source; try { source = analyzer.reusableTokenStream(field, new FastStringReader(text)); source.reset(); } catch (IOException e) { source = analyzer.tokenStream(field, new FastStringReader(text)); } CachingTokenFilter buffer = new CachingTokenFilter(source); CharTermAttribute termAtt = null; PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; boolean success = false; try { buffer.reset(); success = true; } catch (IOException e) { // success==false if we hit an exception } if (success) { if (buffer.hasAttribute(CharTermAttribute.class)) { termAtt = buffer.getAttribute(CharTermAttribute.class); } if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } } int positionCount = 0; boolean severalTokensAtSamePosition = false; boolean hasMoreTokens = false; if (termAtt != null) { try { hasMoreTokens = buffer.incrementToken(); while (hasMoreTokens) { numTokens++; int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } hasMoreTokens = buffer.incrementToken(); } } catch (IOException e) { // ignore } } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (IOException e) { // ignore } Term termFactory = new Term(field); if (numTokens == 0) { return MatchNoDocsQuery.INSTANCE; } else if (type == Type.BOOLEAN) { if (numTokens == 1) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query q = newTermQuery(mapper, termFactory.createTerm(term)); return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } BooleanQuery q = new BooleanQuery(positionCount == 1); for (int i = 0; i < numTokens; i++) { String term = null; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (IOException e) { // safe to ignore, because we know the number of tokens } Query currentQuery = newTermQuery(mapper, termFactory.createTerm(term)); q.add(currentQuery, occur); } return wrapSmartNameQuery(q, smartNameFieldMappers, parseContext); } else if (type == Type.PHRASE) { if (severalTokensAtSamePosition) { MultiPhraseQuery mpq = new MultiPhraseQuery(); mpq.setSlop(phraseSlop); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(termFactory.createTerm(term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); } else { PhraseQuery pq = new PhraseQuery(); pq.setSlop(phraseSlop); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (enablePositionIncrements) { position += positionIncrement; pq.add(termFactory.createTerm(term), position); } else { pq.add(termFactory.createTerm(term)); } } return wrapSmartNameQuery(pq, smartNameFieldMappers, parseContext); } } else if (type == Type.PHRASE_PREFIX) { MultiPhrasePrefixQuery mpq = new MultiPhrasePrefixQuery(); mpq.setSlop(phraseSlop); mpq.setMaxExpansions(maxExpansions); List<Term> multiTerms = new ArrayList<Term>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; int positionIncrement = 1; try { boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); if (posIncrAtt != null) { positionIncrement = posIncrAtt.getPositionIncrement(); } } catch (IOException e) { // safe to ignore, because we know the number of tokens } if (positionIncrement > 0 && multiTerms.size() > 0) { if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } multiTerms.clear(); } position += positionIncrement; multiTerms.add(termFactory.createTerm(term)); } if (enablePositionIncrements) { mpq.add(multiTerms.toArray(new Term[multiTerms.size()]), position); } else { mpq.add(multiTerms.toArray(new Term[multiTerms.size()])); } return wrapSmartNameQuery(mpq, smartNameFieldMappers, parseContext); } throw new ElasticSearchIllegalStateException("No type found for [" + type + "]"); }
From source file:org.janusgraph.diskstorage.solr.SolrIndex.java
License:Apache License
@SuppressWarnings("unchecked") private List<String> customTokenize(String tokenizerClass, String value) { CachingTokenFilter stream = null; try {//from w ww . j a va2 s . c om final List<String> terms = new ArrayList<>(); final Tokenizer tokenizer = ((Constructor<Tokenizer>) ClassLoader.getSystemClassLoader() .loadClass(tokenizerClass).getConstructor()).newInstance(); tokenizer.setReader(new StringReader(value)); stream = new CachingTokenFilter(tokenizer); final TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class); stream.reset(); while (stream.incrementToken()) { terms.add(termAtt.getBytesRef().utf8ToString()); } return terms; } catch (ReflectiveOperationException | IOException e) { throw new IllegalArgumentException(e.getMessage(), e); } finally { IOUtils.closeQuietly(stream); } }
From source file:org.sindice.siren.qparser.keyword.processors.DatatypeAnalyzerProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { this.positionIncrementsEnabled = false; final Boolean positionIncrementsEnabled = this.getQueryConfigHandler() .get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS); if (positionIncrementsEnabled != null) { this.positionIncrementsEnabled = positionIncrementsEnabled; }/*w ww . j a v a2 s .c o m*/ final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final String datatype = (String) fieldNode.getTag(DatatypeQueryNode.DATATYPE_TAGID); if (datatype == null) { return node; } final Analyzer analyzer = this.getQueryConfigHandler().get(KeywordConfigurationKeys.DATATYPES_ANALYZERS) .get(datatype); if (analyzer == null) { throw new QueryNodeException(new MessageImpl(QueryParserMessages.INVALID_SYNTAX, "No analyzer associated with " + datatype)); } PositionIncrementAttribute posIncrAtt = null; int numTokens = 0; int positionCount = 0; boolean severalTokensAtSamePosition = false; final TokenStream source; try { source = analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); if (buffer.hasAttribute(PositionIncrementAttribute.class)) { posIncrAtt = buffer.getAttribute(PositionIncrementAttribute.class); } try { while (buffer.incrementToken()) { numTokens++; final int positionIncrement = (posIncrAtt != null) ? posIncrAtt.getPositionIncrement() : 1; if (positionIncrement != 0) { positionCount += positionIncrement; } else { severalTokensAtSamePosition = true; } } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { if (nbTwigs != 0) { // Twig special case return new WildcardNodeQueryNode(); } return new NoTokenFoundQueryNode(); } else if (numTokens == 1) { String term = null; try { boolean hasNext; hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } fieldNode.setText(term); return fieldNode; } else { // no phrase query: final LinkedList<QueryNode> children = new LinkedList<QueryNode>(); int position = -1; for (int i = 0; i < numTokens; i++) { String term = null; final int positionIncrement = 1; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); if (this.positionIncrementsEnabled) { position += positionIncrement; newFieldNode.setPositionIncrement(position); } else { newFieldNode.setPositionIncrement(i); } children.add(new FieldQueryNode(field, term, -1, -1)); } if (node.getParent() instanceof TokenizedPhraseQueryNode) { throw new QueryNodeException(new MessageImpl("Cannot build a MultiPhraseQuery")); } // If multiple terms at one single position, this must be a query // expansion. Perform a OR between the terms. if (severalTokensAtSamePosition && positionCount == 1) { return new GroupQueryNode(new OrQueryNode(children)); } // if several tokens at same position && position count > 1, then // results can be unexpected else { final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); for (int i = 0; i < children.size(); i++) { pq.add(children.get(i)); } return pq; } } } else if (node instanceof TwigQueryNode) { nbTwigs--; assert nbTwigs >= 0; } return node; }
From source file:org.sindice.siren.qparser.keyword.processors.PhraseQueryNodeProcessor.java
License:Apache License
@Override protected QueryNode postProcessNode(final QueryNode node) throws QueryNodeException { if (node instanceof TextableQueryNode && !(node instanceof WildcardQueryNode) && !(node instanceof FuzzyQueryNode) && !(node instanceof RegexpQueryNode) && !(node.getParent() instanceof RangeQueryNode)) { final FieldQueryNode fieldNode = ((FieldQueryNode) node); final String text = fieldNode.getTextAsString(); final String field = fieldNode.getFieldAsString(); final TokenStream source; try {//www.j a v a 2 s . c o m source = this.analyzer.tokenStream(field, new StringReader(text)); source.reset(); } catch (final IOException e1) { throw new RuntimeException(e1); } final CachingTokenFilter buffer = new CachingTokenFilter(source); int numTokens = 0; try { while (buffer.incrementToken()) { numTokens++; } } catch (final IOException e) { // ignore } try { // rewind the buffer stream buffer.reset(); // close original stream - all tokens buffered source.close(); } catch (final IOException e) { // ignore } if (!buffer.hasAttribute(CharTermAttribute.class)) { return new NoTokenFoundQueryNode(); } final CharTermAttribute termAtt = buffer.getAttribute(CharTermAttribute.class); if (numTokens == 0) { return new NoTokenFoundQueryNode(); } else if (numTokens != 1) { // phrase query final TokenizedPhraseQueryNode pq = new TokenizedPhraseQueryNode(); for (int i = 0; i < numTokens; i++) { String term = null; try { final boolean hasNext = buffer.incrementToken(); assert hasNext == true; term = termAtt.toString(); } catch (final IOException e) { // safe to ignore, because we know the number of tokens } final FieldQueryNode newFieldNode = new FieldQueryNode(field, term, -1, -1); newFieldNode.setPositionIncrement(i); pq.add(newFieldNode); } return pq; } } return node; }