List of usage examples for org.apache.lucene.util CharsRef CharsRef
public CharsRef()
From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractMeSHFilter.java
License:Apache License
/** * Replaces the current term (attributes) with term (attributes) from the * stack//ww w. j av a 2 s. c o m * * @throws IOException */ protected void processTermOnStack() throws IOException { ExpandedTerm expandedTerm = termStack.pop(); String term = expandedTerm.getTerm(); SKOSType termType = expandedTerm.getTermType(); String sTerm = ""; try { sTerm = analyze(analyzer, term, new CharsRef()).toString(); } catch (IllegalArgumentException e) { // skip this term return; } /* * copies the values of all attribute implementations from this state into * the implementations of the target stream */ restoreState(current); /* * Adds the expanded term to the term buffer */ termAtt.setEmpty().append(sTerm); /* * set position increment to zero to put multiple terms into the same * position */ posIncrAtt.setPositionIncrement(0); /* * sets the type of the expanded term (pref, alt, broader, narrower, etc.) */ skosAtt.setSkosType(termType); /* * converts the SKOS Attribute to a payload, which is propagated to the * index */ byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal()); payloadAtt.setPayload(new BytesRef(bytes)); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractSKOSFilter.java
License:Apache License
/** * Replaces the current term (attributes) with term (attributes) from the * stack/*from w w w. j av a 2 s . c o m*/ * * @throws IOException */ protected void processTermOnStack() throws IOException { ExpandedTerm expandedTerm = termStack.pop(); String term = expandedTerm.getTerm(); SKOSType termType = expandedTerm.getTermType(); String sTerm = ""; try { sTerm = analyze(analyzer, term, new CharsRef()).toString(); } catch (IllegalArgumentException e) { // skip this term return; } /* * copies the values of all attribute implementations from this state into * the implementations of the target stream */ restoreState(current); /* * Adds the expanded term to the term buffer */ termAtt.setEmpty().append(sTerm); /* * set position increment to zero to put multiple terms into the same * position */ posIncrAtt.setPositionIncrement(0); /* * sets the type of the expanded term (pref, alt, broader, narrower, etc.) */ skosAtt.setSkosType(termType); /* * converts the SKOS Attribute to a payload, which is propagated to the * index */ payloadAtt.setPayload(new SKOSTypePayload(skosAtt)); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDFilter.java
License:Apache License
/** * Replaces the current term (attributes) with term (attributes) from the * stack/*from w w w . j ava 2 s. c om*/ * * @throws IOException */ protected void processTermOnStack() throws IOException { ExpandedTerm expandedTerm = termStack.pop(); String term = expandedTerm.getTerm(); SKOSType termType = expandedTerm.getTermType(); String sTerm = ""; try { sTerm = analyze(analyzer, term, new CharsRef()).toString(); } catch (IllegalArgumentException e) { // skip this term return; } /* * copies the values of all attribute implementations from this state * into the implementations of the target stream */ restoreState(current); /* * Adds the expanded term to the term buffer */ termAtt.setEmpty().append(sTerm); /* * set position increment to zero to put multiple terms into the same * position */ posIncrAtt.setPositionIncrement(0); /* * sets the type of the expanded term (pref, alt, broader, narrower, * etc.) */ skosAtt.setSkosType(termType); /* * converts the SKOS Attribute to a payload, which is propagated to the * index */ byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal()); payloadAtt.setPayload(new BytesRef(bytes)); }
From source file:com.o19s.solr.swan.highlight.SpanAwareFieldTermStack.java
License:Apache License
/** * a constructor.//from w w w .jav a 2 s . com * * @param reader IndexReader of the index * @param docId document id to be highlighted * @param fieldName field of the document to be highlighted * @param fieldQuery FieldQuery object * @throws IOException If there is a low-level I/O error */ public SpanAwareFieldTermStack(IndexReader reader, int docId, String fieldName, final SpanAwareFieldQuery fieldQuery) throws IOException { this.fieldName = fieldName; Set<String> termSet = fieldQuery.getTermSet(fieldName); Set<String> alwaysHighlightTermSet = fieldQuery.getHighlightTermSet(fieldName); // just return to make null snippet if un-matched fieldName specified when fieldMatch == true if (termSet == null) return; final Fields vectors = reader.getTermVectors(docId); if (vectors == null) { // null snippet return; } final Terms vector = vectors.terms(fieldName); if (vector == null) { // null snippet return; } final CharsRef spare = new CharsRef(); final TermsEnum termsEnum = vector.iterator(null); DocsAndPositionsEnum dpEnum = null; BytesRef text; int numDocs = reader.maxDoc(); while ((text = termsEnum.next()) != null) { UnicodeUtil.UTF8toUTF16(text, spare); final String term = spare.toString(); if (!termSet.contains(term)) { continue; } dpEnum = termsEnum.docsAndPositions(null, dpEnum); if (dpEnum == null) { // null snippet return; } dpEnum.nextDoc(); // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html final float weight = (float) (Math .log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0); final int freq = dpEnum.freq(); for (int i = 0; i < freq; i++) { int pos = dpEnum.nextPosition(); if (dpEnum.startOffset() < 0) { return; // no offsets, null snippet } if (alwaysHighlightTermSet.contains(term) || fieldQuery.doesDocFieldContainPosition(fieldName, docId, dpEnum.startOffset())) { termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight)); } } } // sort by position Collections.sort(termList); }
From source file:in.geocoder.component.GeocodingComponent.java
License:Apache License
private NamedList<Integer> getTerms(SolrIndexSearcher searcher, IndexSchema schema, String field) throws IOException { NamedList<Object> termsResult = new SimpleOrderedMap<Object>(); boolean sort = true; boolean raw = false; final AtomicReader indexReader = searcher.getAtomicReader(); Fields lfields = indexReader.fields(); NamedList<Integer> fieldTerms = new NamedList<Integer>(); termsResult.add(field, fieldTerms);/*from w w w.j av a 2s. co m*/ Terms terms = lfields == null ? null : lfields.terms(field); if (terms == null) { // no terms for this field return new NamedList<Integer>(); } FieldType ft = raw ? null : schema.getFieldTypeNoEx(field); if (ft == null) ft = new StrField(); TermsEnum termsEnum = terms.iterator(null); BytesRef term = null; term = termsEnum.next(); BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(Integer.MAX_VALUE) : null); CharsRef external = new CharsRef(); while (term != null) { boolean externalized = false; // did we fill in "external" yet for this term? // This is a good term in the range. Check if mincount/maxcount conditions are satisfied. int docFreq = termsEnum.docFreq(); // add the term to the list if (sort) { queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq)); } else { // TODO: handle raw somehow if (!externalized) { ft.indexedToReadable(term, external); } fieldTerms.add(external.toString(), docFreq); } term = termsEnum.next(); } if (sort) { for (CountPair<BytesRef, Integer> item : queue) { ft.indexedToReadable(item.key, external); fieldTerms.add(external.toString(), item.val); } } return fieldTerms; }
From source file:lux.index.analysis.QNameTokenFilter.java
License:Mozilla Public License
protected QNameTokenFilter(TokenStream input, ElementVisibility defVis, Map<String, ElementVisibility> elVis) { super(input); term = new CharsRef(); setNamespaceAware(true);//from w w w. j a v a 2s .c o m this.defVis = defVis; this.elVis = elVis; }
From source file:org.apache.solr.handler.admin.LukeRequestHandler.java
License:Apache License
private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader, IndexSchema schema) throws IOException { final CharsRef spare = new CharsRef(); SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>(); for (Object o : doc.getFields()) { Field field = (Field) o; SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>(); SchemaField sfield = schema.getFieldOrNull(field.name()); FieldType ftype = (sfield == null) ? null : sfield.getType(); f.add("type", (ftype == null) ? null : ftype.getTypeName()); f.add("schema", getFieldFlags(sfield)); f.add("flags", getFieldFlags(field)); Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue()); f.add("value", (ftype == null) ? null : ftype.toExternal(field)); // TODO: this really should be "stored" f.add("internal", field.stringValue()); // may be a binary number BytesRef bytes = field.binaryValue(); if (bytes != null) { f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length)); }/*from w w w. ja v a 2s . co m*/ f.add("boost", field.boost()); f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields // If we have a term vector, return that if (field.fieldType().storeTermVectors()) { try { Terms v = reader.getTermVector(docId, field.name()); if (v != null) { SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>(); final TermsEnum termsEnum = v.iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { final int freq = (int) termsEnum.totalTermFreq(); UnicodeUtil.UTF8toUTF16(text, spare); tfv.add(spare.toString(), freq); } f.add("termVector", tfv); } } catch (Exception ex) { log.warn("error writing term vector", ex); } } finfo.add(field.name(), f); } return finfo; }
From source file:org.apache.solr.handler.admin.LukeRequestHandler.java
License:Apache License
@SuppressWarnings("unchecked") private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap) throws IOException { SolrParams params = req.getParams(); final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT); TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in. final CharsRef spare = new CharsRef(); Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader()); if (fields == null) { // No indexed fields return;/*from w w w . j a va2 s . co m*/ } Terms terms = fields.terms(field); if (terms == null) { // No terms in the field. return; } TermsEnum termsEnum = terms.iterator(null); BytesRef text; int[] buckets = new int[HIST_ARRAY_SIZE]; while ((text = termsEnum.next()) != null) { ++tiq.distinctTerms; int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to. int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1)); buckets[slot] = buckets[slot] + 1; if (numTerms > 0 && freq > tiq.minFreq) { UnicodeUtil.UTF8toUTF16(text, spare); String t = spare.toString(); tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq())); if (tiq.size() > numTerms) { // if tiq full tiq.pop(); // remove lowest in tiq tiq.minFreq = tiq.getTopTermInfo().docFreq; } } } tiq.histogram.add(buckets); fieldMap.add("distinct", tiq.distinctTerms); // Include top terms fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema())); // Add a histogram fieldMap.add("histogram", tiq.histogram.toNamedList()); }
From source file:org.apache.solr.handler.component.ExpandAllComponent.java
License:Apache License
@Override public void process(ResponseBuilder rb) throws IOException { if (!doExpandAll(rb)) { return;/* www . ja va 2s . com*/ } SolrQueryRequest req = rb.req; SolrParams params = req.getParams(); boolean isShard = params.getBool(ShardParams.IS_SHARD, false); String ids = params.get(ShardParams.IDS); if (ids == null && isShard) { return; } String field = params.get(ExpandParams.EXPAND_FIELD); if (field == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) { CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q; field = cp.getField(); } } } } if (field == null) { throw new IOException("Expand field is null."); } String sortParam = params.get(ExpandParams.EXPAND_SORT); String[] fqs = params.getParams(ExpandParams.EXPAND_FQ); String qs = params.get(ExpandParams.EXPAND_Q); int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5); Sort sort = null; if (sortParam != null) { sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort(); } Query query = null; if (qs == null) { query = rb.getQuery(); } else { try { QParser parser = QParser.getParser(qs, null, req); query = parser.getQuery(); } catch (Exception e) { throw new IOException(e); } } List<Query> newFilters = new ArrayList(); if (fqs == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) { newFilters.add(q); } } } } else { try { for (String fq : fqs) { if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) { QParser fqp = QParser.getParser(fq, null, req); newFilters.add(fqp.getQuery()); } } } catch (Exception e) { throw new IOException(e); } } SolrIndexSearcher searcher = req.getSearcher(); AtomicReader reader = searcher.getAtomicReader(); SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field); FixedBitSet groupBits = new FixedBitSet(values.getValueCount()); DocList docList = rb.getResults().docList; IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2); DocIterator idit = docList.iterator(); while (idit.hasNext()) { int doc = idit.nextDoc(); int ord = values.getOrd(doc); if (ord > -1) { groupBits.set(ord); collapsedSet.add(doc); } } Collector collector = null; GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort); SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters); if (pfilter.postFilter != null) { pfilter.postFilter.setLastDelegate(groupExpandCollector); collector = pfilter.postFilter; } else { collector = groupExpandCollector; } searcher.search(query, pfilter.filter, collector); IntObjectOpenHashMap groups = groupExpandCollector.getGroups(); Iterator<IntObjectCursor> it = groups.iterator(); Map<String, DocSlice> outMap = new HashMap(); BytesRef bytesRef = new BytesRef(); CharsRef charsRef = new CharsRef(); FieldType fieldType = searcher.getSchema().getField(field).getType(); while (it.hasNext()) { IntObjectCursor cursor = it.next(); int ord = cursor.key; TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value; TopDocs topDocs = topDocsCollector.topDocs(); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length > 0) { int[] docs = new int[scoreDocs.length]; float[] scores = new float[scoreDocs.length]; for (int i = 0; i < docs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; docs[i] = scoreDoc.doc; scores[i] = scoreDoc.score; } DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore()); values.lookupOrd(ord, bytesRef); fieldType.indexedToReadable(bytesRef, charsRef); String group = charsRef.toString(); outMap.put(group, slice); } } rb.rsp.add("expanded", outMap); }
From source file:org.apache.solr.handler.component.ExpandComponent.java
License:Apache License
@SuppressWarnings("unchecked") @Override/*from w w w . j a va 2 s . c om*/ public void process(ResponseBuilder rb) throws IOException { if (!rb.doExpand) { return; } SolrQueryRequest req = rb.req; SolrParams params = req.getParams(); boolean isShard = params.getBool(ShardParams.IS_SHARD, false); String ids = params.get(ShardParams.IDS); if (ids == null && isShard) { return; } String field = params.get(ExpandParams.EXPAND_FIELD); if (field == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) { CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q; field = cp.getField(); } } } } if (field == null) { throw new IOException("Expand field is null."); } String sortParam = params.get(ExpandParams.EXPAND_SORT); String[] fqs = params.getParams(ExpandParams.EXPAND_FQ); String qs = params.get(ExpandParams.EXPAND_Q); int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5); Sort sort = null; if (sortParam != null) { sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort(); } Query query; if (qs == null) { query = rb.getQuery(); } else { try { QParser parser = QParser.getParser(qs, null, req); query = parser.getQuery(); } catch (Exception e) { throw new IOException(e); } } List<Query> newFilters = new ArrayList<>(); if (fqs == null) { List<Query> filters = rb.getFilters(); if (filters != null) { for (Query q : filters) { if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) { newFilters.add(q); } } } } else { try { for (String fq : fqs) { if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) { QParser fqp = QParser.getParser(fq, null, req); newFilters.add(fqp.getQuery()); } } } catch (Exception e) { throw new IOException(e); } } SolrIndexSearcher searcher = req.getSearcher(); AtomicReader reader = searcher.getAtomicReader(); SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field); FixedBitSet groupBits = new FixedBitSet(values.getValueCount()); DocList docList = rb.getResults().docList; IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2); DocIterator idit = docList.iterator(); while (idit.hasNext()) { int doc = idit.nextDoc(); int ord = values.getOrd(doc); if (ord > -1) { groupBits.set(ord); collapsedSet.add(doc); } } Collector collector; if (sort != null) sort = sort.rewrite(searcher); GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit, sort); SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters); if (pfilter.postFilter != null) { pfilter.postFilter.setLastDelegate(groupExpandCollector); collector = pfilter.postFilter; } else { collector = groupExpandCollector; } searcher.search(query, pfilter.filter, collector); IntObjectMap groups = groupExpandCollector.getGroups(); Map<String, DocSlice> outMap = new HashMap(); CharsRef charsRef = new CharsRef(); FieldType fieldType = searcher.getSchema().getField(field).getType(); for (IntObjectCursor cursor : (Iterable<IntObjectCursor>) groups) { int ord = cursor.key; TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value; TopDocs topDocs = topDocsCollector.topDocs(); ScoreDoc[] scoreDocs = topDocs.scoreDocs; if (scoreDocs.length > 0) { int[] docs = new int[scoreDocs.length]; float[] scores = new float[scoreDocs.length]; for (int i = 0; i < docs.length; i++) { ScoreDoc scoreDoc = scoreDocs[i]; docs[i] = scoreDoc.doc; scores[i] = scoreDoc.score; } DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits, topDocs.getMaxScore()); final BytesRef bytesRef = values.lookupOrd(ord); fieldType.indexedToReadable(bytesRef, charsRef); String group = charsRef.toString(); outMap.put(group, slice); } } rb.rsp.add("expanded", outMap); }