Example usage for org.apache.lucene.util CharsRef CharsRef

List of usage examples for org.apache.lucene.util CharsRef CharsRef

Introduction

In this page you can find the example usage for org.apache.lucene.util CharsRef CharsRef.

Prototype

public CharsRef() 

Source Link

Document

Creates a new CharsRef initialized an empty array zero-length

Usage

From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractMeSHFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack//ww  w.  j av a  2  s.  c o  m
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state into
     * the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower, etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractSKOSFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack/*from  w w  w. j av a  2  s . c o m*/
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state into
     * the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower, etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    payloadAtt.setPayload(new SKOSTypePayload(skosAtt));
}

From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDFilter.java

License:Apache License

/**
 * Replaces the current term (attributes) with term (attributes) from the
 * stack/*from w  w  w  .  j ava 2 s. c om*/
 * 
 * @throws IOException
 */
protected void processTermOnStack() throws IOException {
    ExpandedTerm expandedTerm = termStack.pop();

    String term = expandedTerm.getTerm();

    SKOSType termType = expandedTerm.getTermType();

    String sTerm = "";

    try {
        sTerm = analyze(analyzer, term, new CharsRef()).toString();
    } catch (IllegalArgumentException e) {
        // skip this term
        return;
    }

    /*
     * copies the values of all attribute implementations from this state
     * into the implementations of the target stream
     */
    restoreState(current);

    /*
     * Adds the expanded term to the term buffer
     */
    termAtt.setEmpty().append(sTerm);

    /*
     * set position increment to zero to put multiple terms into the same
     * position
     */
    posIncrAtt.setPositionIncrement(0);

    /*
     * sets the type of the expanded term (pref, alt, broader, narrower,
     * etc.)
     */
    skosAtt.setSkosType(termType);

    /*
     * converts the SKOS Attribute to a payload, which is propagated to the
     * index
     */
    byte[] bytes = PayloadHelper.encodeInt(skosAtt.getSkosType().ordinal());
    payloadAtt.setPayload(new BytesRef(bytes));
}

From source file:com.o19s.solr.swan.highlight.SpanAwareFieldTermStack.java

License:Apache License

/**
 * a constructor.//from w  w  w .jav  a 2 s . com
 * 
 * @param reader IndexReader of the index
 * @param docId document id to be highlighted
 * @param fieldName field of the document to be highlighted
 * @param fieldQuery FieldQuery object
 * @throws IOException If there is a low-level I/O error
 */
public SpanAwareFieldTermStack(IndexReader reader, int docId, String fieldName,
        final SpanAwareFieldQuery fieldQuery) throws IOException {
    this.fieldName = fieldName;

    Set<String> termSet = fieldQuery.getTermSet(fieldName);
    Set<String> alwaysHighlightTermSet = fieldQuery.getHighlightTermSet(fieldName);

    // just return to make null snippet if un-matched fieldName specified when fieldMatch == true
    if (termSet == null)
        return;

    final Fields vectors = reader.getTermVectors(docId);
    if (vectors == null) {
        // null snippet
        return;
    }

    final Terms vector = vectors.terms(fieldName);
    if (vector == null) {
        // null snippet
        return;
    }

    final CharsRef spare = new CharsRef();
    final TermsEnum termsEnum = vector.iterator(null);
    DocsAndPositionsEnum dpEnum = null;
    BytesRef text;

    int numDocs = reader.maxDoc();
    while ((text = termsEnum.next()) != null) {
        UnicodeUtil.UTF8toUTF16(text, spare);
        final String term = spare.toString();
        if (!termSet.contains(term)) {
            continue;
        }
        dpEnum = termsEnum.docsAndPositions(null, dpEnum);
        if (dpEnum == null) {
            // null snippet
            return;
        }

        dpEnum.nextDoc();

        // For weight look here: http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
        final float weight = (float) (Math
                .log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0);

        final int freq = dpEnum.freq();

        for (int i = 0; i < freq; i++) {
            int pos = dpEnum.nextPosition();
            if (dpEnum.startOffset() < 0) {
                return; // no offsets, null snippet
            }

            if (alwaysHighlightTermSet.contains(term)
                    || fieldQuery.doesDocFieldContainPosition(fieldName, docId, dpEnum.startOffset())) {
                termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight));
            }
        }

    }

    // sort by position
    Collections.sort(termList);
}

From source file:in.geocoder.component.GeocodingComponent.java

License:Apache License

private NamedList<Integer> getTerms(SolrIndexSearcher searcher, IndexSchema schema, String field)
        throws IOException {
    NamedList<Object> termsResult = new SimpleOrderedMap<Object>();

    boolean sort = true;

    boolean raw = false;

    final AtomicReader indexReader = searcher.getAtomicReader();
    Fields lfields = indexReader.fields();

    NamedList<Integer> fieldTerms = new NamedList<Integer>();
    termsResult.add(field, fieldTerms);/*from  w  w  w.j  av  a 2s. co m*/

    Terms terms = lfields == null ? null : lfields.terms(field);
    if (terms == null) {
        // no terms for this field
        return new NamedList<Integer>();
    }

    FieldType ft = raw ? null : schema.getFieldTypeNoEx(field);
    if (ft == null)
        ft = new StrField();

    TermsEnum termsEnum = terms.iterator(null);
    BytesRef term = null;

    term = termsEnum.next();

    BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort
            ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(Integer.MAX_VALUE)
            : null);
    CharsRef external = new CharsRef();
    while (term != null) {
        boolean externalized = false; // did we fill in "external" yet for this term?

        // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
        int docFreq = termsEnum.docFreq();
        // add the term to the list
        if (sort) {
            queue.add(new CountPair<BytesRef, Integer>(BytesRef.deepCopyOf(term), docFreq));
        } else {
            // TODO: handle raw somehow
            if (!externalized) {
                ft.indexedToReadable(term, external);
            }
            fieldTerms.add(external.toString(), docFreq);
        }

        term = termsEnum.next();
    }

    if (sort) {
        for (CountPair<BytesRef, Integer> item : queue) {
            ft.indexedToReadable(item.key, external);
            fieldTerms.add(external.toString(), item.val);
        }
    }

    return fieldTerms;
}

From source file:lux.index.analysis.QNameTokenFilter.java

License:Mozilla Public License

protected QNameTokenFilter(TokenStream input, ElementVisibility defVis, Map<String, ElementVisibility> elVis) {
    super(input);
    term = new CharsRef();
    setNamespaceAware(true);//from  w  w w.  j a v  a  2s  .c o  m
    this.defVis = defVis;
    this.elVis = elVis;
}

From source file:org.apache.solr.handler.admin.LukeRequestHandler.java

License:Apache License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRef spare = new CharsRef();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<Object>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }/*from w  w  w. ja v a 2s .  co m*/
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this can be 0 for non-indexed fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
                    final TermsEnum termsEnum = v.iterator(null);
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        UnicodeUtil.UTF8toUTF16(text, spare);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.handler.admin.LukeRequestHandler.java

License:Apache License

@SuppressWarnings("unchecked")
private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
        throws IOException {

    SolrParams params = req.getParams();
    final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);

    TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to collect the top N terms in.

    final CharsRef spare = new CharsRef();

    Fields fields = MultiFields.getFields(req.getSearcher().getIndexReader());

    if (fields == null) { // No indexed fields
        return;/*from  w  w  w .  j  a  va2 s .  co m*/
    }

    Terms terms = fields.terms(field);
    if (terms == null) { // No terms in the field.
        return;
    }
    TermsEnum termsEnum = terms.iterator(null);
    BytesRef text;
    int[] buckets = new int[HIST_ARRAY_SIZE];
    while ((text = termsEnum.next()) != null) {
        ++tiq.distinctTerms;
        int freq = termsEnum.docFreq(); // This calculation seems odd, but it gives the same results as it used to.
        int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
        buckets[slot] = buckets[slot] + 1;
        if (numTerms > 0 && freq > tiq.minFreq) {
            UnicodeUtil.UTF8toUTF16(text, spare);
            String t = spare.toString();

            tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
                tiq.pop(); // remove lowest in tiq
                tiq.minFreq = tiq.getTopTermInfo().docFreq;
            }
        }
    }
    tiq.histogram.add(buckets);
    fieldMap.add("distinct", tiq.distinctTerms);

    // Include top terms
    fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));

    // Add a histogram
    fieldMap.add("histogram", tiq.histogram.toNamedList());
}

From source file:org.apache.solr.handler.component.ExpandAllComponent.java

License:Apache License

@Override
public void process(ResponseBuilder rb) throws IOException {

    if (!doExpandAll(rb)) {
        return;/* www . ja  va  2s .  com*/
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    String ids = params.get(ShardParams.IDS);

    if (ids == null && isShard) {
        return;
    }

    String field = params.get(ExpandParams.EXPAND_FIELD);
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                }
            }
        }
    }

    if (field == null) {
        throw new IOException("Expand field is null.");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
        sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    Query query = null;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, null, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    List<Query> newFilters = new ArrayList();

    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    SolrIndexSearcher searcher = req.getSearcher();
    AtomicReader reader = searcher.getAtomicReader();
    SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    FixedBitSet groupBits = new FixedBitSet(values.getValueCount());
    DocList docList = rb.getResults().docList;
    IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2);

    DocIterator idit = docList.iterator();

    while (idit.hasNext()) {
        int doc = idit.nextDoc();
        int ord = values.getOrd(doc);
        if (ord > -1) {
            groupBits.set(ord);
            collapsedSet.add(doc);
        }
    }

    Collector collector = null;
    GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit,
            sort);
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }

    searcher.search(query, pfilter.filter, collector);
    IntObjectOpenHashMap groups = groupExpandCollector.getGroups();
    Iterator<IntObjectCursor> it = groups.iterator();
    Map<String, DocSlice> outMap = new HashMap();
    BytesRef bytesRef = new BytesRef();
    CharsRef charsRef = new CharsRef();
    FieldType fieldType = searcher.getSchema().getField(field).getType();

    while (it.hasNext()) {
        IntObjectCursor cursor = it.next();
        int ord = cursor.key;
        TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value;
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits,
                    topDocs.getMaxScore());
            values.lookupOrd(ord, bytesRef);
            fieldType.indexedToReadable(bytesRef, charsRef);
            String group = charsRef.toString();
            outMap.put(group, slice);
        }
    }

    rb.rsp.add("expanded", outMap);
}

From source file:org.apache.solr.handler.component.ExpandComponent.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from  w  w  w  .  j  a va 2  s . c om*/
public void process(ResponseBuilder rb) throws IOException {

    if (!rb.doExpand) {
        return;
    }

    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();

    boolean isShard = params.getBool(ShardParams.IS_SHARD, false);
    String ids = params.get(ShardParams.IDS);

    if (ids == null && isShard) {
        return;
    }

    String field = params.get(ExpandParams.EXPAND_FIELD);
    if (field == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (q instanceof CollapsingQParserPlugin.CollapsingPostFilter) {
                    CollapsingQParserPlugin.CollapsingPostFilter cp = (CollapsingQParserPlugin.CollapsingPostFilter) q;
                    field = cp.getField();
                }
            }
        }
    }

    if (field == null) {
        throw new IOException("Expand field is null.");
    }

    String sortParam = params.get(ExpandParams.EXPAND_SORT);
    String[] fqs = params.getParams(ExpandParams.EXPAND_FQ);
    String qs = params.get(ExpandParams.EXPAND_Q);
    int limit = params.getInt(ExpandParams.EXPAND_ROWS, 5);

    Sort sort = null;

    if (sortParam != null) {
        sort = QueryParsing.parseSortSpec(sortParam, rb.req).getSort();
    }

    Query query;
    if (qs == null) {
        query = rb.getQuery();
    } else {
        try {
            QParser parser = QParser.getParser(qs, null, req);
            query = parser.getQuery();
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    List<Query> newFilters = new ArrayList<>();

    if (fqs == null) {
        List<Query> filters = rb.getFilters();
        if (filters != null) {
            for (Query q : filters) {
                if (!(q instanceof CollapsingQParserPlugin.CollapsingPostFilter)) {
                    newFilters.add(q);
                }
            }
        }
    } else {
        try {
            for (String fq : fqs) {
                if (fq != null && fq.trim().length() != 0 && !fq.equals("*:*")) {
                    QParser fqp = QParser.getParser(fq, null, req);
                    newFilters.add(fqp.getQuery());
                }
            }
        } catch (Exception e) {
            throw new IOException(e);
        }
    }

    SolrIndexSearcher searcher = req.getSearcher();
    AtomicReader reader = searcher.getAtomicReader();
    SortedDocValues values = FieldCache.DEFAULT.getTermsIndex(reader, field);
    FixedBitSet groupBits = new FixedBitSet(values.getValueCount());
    DocList docList = rb.getResults().docList;
    IntOpenHashSet collapsedSet = new IntOpenHashSet(docList.size() * 2);

    DocIterator idit = docList.iterator();

    while (idit.hasNext()) {
        int doc = idit.nextDoc();
        int ord = values.getOrd(doc);
        if (ord > -1) {
            groupBits.set(ord);
            collapsedSet.add(doc);
        }
    }

    Collector collector;
    if (sort != null)
        sort = sort.rewrite(searcher);
    GroupExpandCollector groupExpandCollector = new GroupExpandCollector(values, groupBits, collapsedSet, limit,
            sort);
    SolrIndexSearcher.ProcessedFilter pfilter = searcher.getProcessedFilter(null, newFilters);
    if (pfilter.postFilter != null) {
        pfilter.postFilter.setLastDelegate(groupExpandCollector);
        collector = pfilter.postFilter;
    } else {
        collector = groupExpandCollector;
    }

    searcher.search(query, pfilter.filter, collector);
    IntObjectMap groups = groupExpandCollector.getGroups();
    Map<String, DocSlice> outMap = new HashMap();
    CharsRef charsRef = new CharsRef();
    FieldType fieldType = searcher.getSchema().getField(field).getType();
    for (IntObjectCursor cursor : (Iterable<IntObjectCursor>) groups) {
        int ord = cursor.key;
        TopDocsCollector topDocsCollector = (TopDocsCollector) cursor.value;
        TopDocs topDocs = topDocsCollector.topDocs();
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        if (scoreDocs.length > 0) {
            int[] docs = new int[scoreDocs.length];
            float[] scores = new float[scoreDocs.length];
            for (int i = 0; i < docs.length; i++) {
                ScoreDoc scoreDoc = scoreDocs[i];
                docs[i] = scoreDoc.doc;
                scores[i] = scoreDoc.score;
            }
            DocSlice slice = new DocSlice(0, docs.length, docs, scores, topDocs.totalHits,
                    topDocs.getMaxScore());
            final BytesRef bytesRef = values.lookupOrd(ord);
            fieldType.indexedToReadable(bytesRef, charsRef);
            String group = charsRef.toString();
            outMap.put(group, slice);
        }
    }

    rb.rsp.add("expanded", outMap);
}