Example usage for org.apache.lucene.util CharsRefBuilder CharsRefBuilder

List of usage examples for org.apache.lucene.util CharsRefBuilder CharsRefBuilder

Introduction

In this page you can find the example usage for org.apache.lucene.util CharsRefBuilder CharsRefBuilder.

Prototype

public CharsRefBuilder() 

Source Link

Document

Sole constructor.

Usage

From source file:alix.lucene.MoreLikeThis.java

License:Apache License

/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 *///from   w  w w  .  ja  va  2 s  .  c o  m
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        final int freq = (int) termsEnum.totalTermFreq();

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}

From source file:alix.lucene.MoreLikeThis.java

License:Apache License

/**
 * Print a term vector for debugging/*from w  w  w. j a va  2s  . c om*/
 * 
 * @param vector List of terms and their frequencies for a doc/field
 * @throws IOException 
 */
@SuppressWarnings("unused")
private void print(Terms vector) throws IOException {
    if (vector == null)
        return;
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    // termsEnum.docFreq() = 1, 
    // The returned Fields instance acts like a single-document inverted index
    HashMap<String, Long> map = new HashMap<String, Long>();
    while ((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        map.put(spare.toString(), termsEnum.totalTermFreq());
    }
    @SuppressWarnings("unchecked")
    Map.Entry<String, Long>[] a = map.entrySet().toArray(new Map.Entry[0]);
    Arrays.sort(a, new Comparator<Map.Entry<String, Long>>() {
        public int compare(Map.Entry<String, Long> o1, Map.Entry<String, Long> o2) {
            return o2.getValue().compareTo(o1.getValue());
        }
    });
    for (Map.Entry<String, Long> e : a) {
        System.out.print(e.getKey() + ":" + e.getValue() + " ");
    }
    System.out.println();
}

From source file:com.nlp.mlt.SimQuery.java

License:Apache License

/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 *///w  w  w .  ja  va 2s .  com
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector) throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        final int freq = (int) termsEnum.totalTermFreq();

        System.out.println(term + " = " + freq);

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}

From source file:com.shaie.fst.FstExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    final CharsRef output = new CharsRef("color");
    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    builder.add(SynonymMap.Builder.join("blue".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("green".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("pale green".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("pale blue".split(" "), new CharsRefBuilder()), output, true);
    builder.add(SynonymMap.Builder.join("dark sea green".split(" "), new CharsRefBuilder()), output, true);
    final SynonymMap synMap = builder.build();
    try (PrintWriter pw = new PrintWriter("d:/tmp/syns.dot");) {
        Util.toDot(synMap.fst, pw, true, true);
    }/*w  w  w  .j a v  a  2s  . c o  m*/
    System.out.println("Done!");
}

From source file:com.shaie.SynonymFilterExample.java

License:Apache License

private static void addSynonym(String input, String output, SynonymMap.Builder builder) {
    final CharsRef inputWords = SynonymMap.Builder.join(input.split(" "), new CharsRefBuilder());
    final CharsRef outputWords = SynonymMap.Builder.join(output.split(" "), new CharsRefBuilder());
    builder.add(inputWords, outputWords, true);
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

private static SimpleOrderedMap<Object> getDocumentFieldsInfo(Document doc, int docId, IndexReader reader,
        IndexSchema schema) throws IOException {
    final CharsRefBuilder spare = new CharsRefBuilder();
    SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<>();
    for (Object o : doc.getFields()) {
        Field field = (Field) o;
        SimpleOrderedMap<Object> f = new SimpleOrderedMap<>();

        SchemaField sfield = schema.getFieldOrNull(field.name());
        FieldType ftype = (sfield == null) ? null : sfield.getType();

        f.add("type", (ftype == null) ? null : ftype.getTypeName());
        f.add("schema", getFieldFlags(sfield));
        f.add("flags", getFieldFlags(field));

        Term t = new Term(field.name(), ftype != null ? ftype.storedToIndexed(field) : field.stringValue());

        f.add("value", (ftype == null) ? null : ftype.toExternal(field));

        // TODO: this really should be "stored"
        f.add("internal", field.stringValue()); // may be a binary number

        BytesRef bytes = field.binaryValue();
        if (bytes != null) {
            f.add("binary", Base64.byteArrayToBase64(bytes.bytes, bytes.offset, bytes.length));
        }//from  www. ja  va  2  s. c  o m
        f.add("boost", field.boost());
        f.add("docFreq", t.text() == null ? 0 : reader.docFreq(t)); // this
        // can
        // be 0
        // for
        // non-indexed
        // fields

        // If we have a term vector, return that
        if (field.fieldType().storeTermVectors()) {
            try {
                Terms v = reader.getTermVector(docId, field.name());
                if (v != null) {
                    SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<>();
                    final TermsEnum termsEnum = v.iterator();
                    BytesRef text;
                    while ((text = termsEnum.next()) != null) {
                        final int freq = (int) termsEnum.totalTermFreq();
                        spare.copyUTF8Bytes(text);
                        tfv.add(spare.toString(), freq);
                    }
                    f.add("termVector", tfv);
                }
            } catch (Exception ex) {
                log.warn("error writing term vector", ex);
            }
        }

        finfo.add(field.name(), f);
    }
    return finfo;
}

From source file:org.apache.solr.handler.component.AlfrescoLukeRequestHandler.java

License:Open Source License

@SuppressWarnings("unchecked")
private static void getDetailedFieldInfo(SolrQueryRequest req, String field, SimpleOrderedMap<Object> fieldMap)
        throws IOException {

    SolrParams params = req.getParams();
    final int numTerms = params.getInt(NUMTERMS, DEFAULT_COUNT);

    TopTermQueue tiq = new TopTermQueue(numTerms + 1); // Something to
    // collect the top N
    // terms in./* w w w.  j  a  v  a  2  s  .c  o m*/

    final CharsRefBuilder spare = new CharsRefBuilder();

    Terms terms = MultiFields.getTerms(req.getSearcher().getIndexReader(), field);
    if (terms == null) { // field does not exist
        return;
    }
    TermsEnum termsEnum = terms.iterator();
    BytesRef text;
    int[] buckets = new int[HIST_ARRAY_SIZE];
    while ((text = termsEnum.next()) != null) {
        ++tiq.distinctTerms;
        int freq = termsEnum.docFreq(); // This calculation seems odd, but
        // it gives the same results as it
        // used to.
        int slot = 32 - Integer.numberOfLeadingZeros(Math.max(0, freq - 1));
        buckets[slot] = buckets[slot] + 1;
        if (numTerms > 0 && freq > tiq.minFreq) {
            spare.copyUTF8Bytes(text);
            String t = spare.toString();

            tiq.add(new TopTermQueue.TermInfo(new Term(field, t), termsEnum.docFreq()));
            if (tiq.size() > numTerms) { // if tiq full
                tiq.pop(); // remove lowest in tiq
                tiq.minFreq = tiq.getTopTermInfo().docFreq;
            }
        }
    }
    tiq.histogram.add(buckets);
    fieldMap.add("distinct", tiq.distinctTerms);

    // Include top terms
    fieldMap.add("topTerms", tiq.toNamedList(req.getSearcher().getSchema()));

    // Add a histogram
    fieldMap.add("histogram", tiq.histogram.toNamedList());
}

From source file:org.apache.solr.search.join.BlockJoinFieldFacetAccumulator.java

License:Apache License

/** copy paste from {@link DocValuesFacets} */
NamedList<Integer> getFacetValue() throws IOException {
    NamedList<Integer> facetValue = new NamedList<>();
    final CharsRefBuilder charsRef = new CharsRefBuilder(); // if there is no globs, take segment's ones
    for (int i = 1; i < (globalCounts != null ? globalCounts.length : segmentAccums.length); i++) {
        int count = globalCounts != null ? globalCounts[i] : (int) (segmentAccums[i] >> 32);
        if (count > 0) {
            BytesRef term = topSSDV.lookupOrd(-1 + i);
            fieldType.indexedToReadable(term, charsRef);
            facetValue.add(charsRef.toString(), count);
        }//  ww  w  .  j  av  a2s .  c  o m
    }
    return facetValue;
}

From source file:org.apache.solr.update.processor.TolerantUpdateProcessor.java

License:Apache License

/**
 * Returns the output of {@link org.apache.solr.schema.FieldType#
 * indexedToReadable(BytesRef, CharsRefBuilder)} of the field
 * type of the uniqueKey on the {@link BytesRef} passed as parameter.
 * <code>ref</code> should be the indexed representation of the id -- if null
 * (possibly because it's missing in the update) this method will return {@link #UNKNOWN_ID}
 *//*from   ww w  . j a  va 2 s. c o m*/
private String getPrintableId(BytesRef ref) {
    if (ref == null) {
        return UNKNOWN_ID;
    }
    return uniqueKeyField.getType().indexedToReadable(ref, new CharsRefBuilder()).toString();
}

From source file:org.codelibs.elasticsearch.common.lucene.search.XMoreLikeThis.java

License:Apache License

/**
 * Adds terms and frequencies found in vector into the Map termFreqMap
 *
 * @param termFreqMap a Map of terms and their frequencies
 * @param vector List of terms and their frequencies for a doc/field
 * @param fieldName Optional field name of the terms for skip terms
 *//* w  w w  . j  ava  2  s.c o  m*/
private void addTermFrequencies(Map<String, Int> termFreqMap, Terms vector, @Nullable String fieldName)
        throws IOException {
    final TermsEnum termsEnum = vector.iterator();
    final CharsRefBuilder spare = new CharsRefBuilder();
    BytesRef text;
    while ((text = termsEnum.next()) != null) {
        spare.copyUTF8Bytes(text);
        final String term = spare.toString();
        if (isNoiseWord(term)) {
            continue;
        }
        if (isSkipTerm(fieldName, term)) {
            continue;
        }

        final PostingsEnum docs = termsEnum.postings(null);
        int freq = 0;
        while (docs != null && docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
            freq += docs.freq();
        }

        // increment frequency
        Int cnt = termFreqMap.get(term);
        if (cnt == null) {
            cnt = new Int();
            termFreqMap.put(term, cnt);
            cnt.x = freq;
        } else {
            cnt.x += freq;
        }
    }
}