Example usage for org.apache.lucene.index LeafReader terms

List of usage examples for org.apache.lucene.index LeafReader terms

Introduction

In this page you can find the example usage for org.apache.lucene.index LeafReader terms.

Prototype

public abstract Terms terms(String field) throws IOException;

Source Link

Document

Returns the Terms index for this field, or null if it has none.

Usage

From source file:org.voyanttools.trombone.lucene.CorpusMapper.java

License:Open Source License

/**
 * This should not be called, except from the private build() method.
 * @throws IOException/*from ww w . j  a v a2  s  . c  o m*/
 */
private void buildFromTermsEnum() throws IOException {
    LeafReader reader = SlowCompositeReaderWrapper
            .wrap(storage.getLuceneManager().getDirectoryReader(corpus.getId()));

    Terms terms = reader.terms("id");
    TermsEnum termsEnum = terms.iterator();
    BytesRef bytesRef = termsEnum.next();
    int doc;
    String id;
    Set<String> ids = new HashSet<String>(getCorpusDocumentIds());
    bitSet = new SparseFixedBitSet(reader.numDocs());
    Bits liveBits = reader.getLiveDocs();
    while (bytesRef != null) {
        PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE);
        doc = postingsEnum.nextDoc();
        if (doc != PostingsEnum.NO_MORE_DOCS) {
            id = bytesRef.utf8ToString();
            if (ids.contains(id)) {
                bitSet.set(doc);
                luceneIds.add(doc);
                documentIdToLuceneIdMap.put(id, doc);
                luceneIdToDocumentIdMap.put(doc, id);
            }
        }
        bytesRef = termsEnum.next();
    }
    this.reader = new FilteredCorpusReader(reader, bitSet);
}

From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java

License:Apache License

public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException {

    final IndexedField fld = models.indexedField(fieldName);
    final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap();
    final TIntIntHashMap ordCounts = new TIntIntHashMap();

    if (filter != null) {
        filter = filter.toLowerCase();//from   w ww . j  av  a2s. c o  m
    }

    // Get count of segments.
    //
    int sz = ir.leaves().size();

    for (int i = 0; i != sz; i++) {
        // Get the segment reader.
        //
        LeafReader lr = ir.leaves().get(i).reader();

        // Doc count for field. Eg "album_genres"
        //
        lr.getDocCount(fld.getName());

        // Get all documents that have the field "album_genres"
        //
        Bits docs = lr.getDocsWithField(fld.getName());
        ordCounts.clear();

        // Enumerate the field terms.
        //
        if (fld.isDocValues()) {
            if (fld.isMultiValue()) {
                // docvalues & multivalue is a SortedSetDocValues
                // Per-Document values in a SortedDocValues are
                // deduplicated, dereferenced, and sorted into a dictionary
                // of
                // unique values. A pointer to the dictionary value
                // (ordinal) can be retrieved for each document.
                // Ordinals are dense and in increasing sorted order.
                //
                SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Enumerate the set of [terms] of
                            // "album_genres" for the document represented
                            // by docId.
                            // Each ord represents the term value.
                            //
                            set.setDocument(docId);

                            // For each term bump up the frequency.
                            //
                            long ord;
                            while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
                                ordCounts.adjustOrPutValue((int) ord, 1, 1);

                                System.out.println("term=" + set.lookupOrd(ord).utf8ToString());
                            }
                        }
                    }

                    TermsEnum te = set.termsEnum();
                    BytesRef term;

                    while ((term = te.next()) != null) {

                        int ord = (int) te.ord();

                        add(fld, valuesMap, filter, term, ordCounts.get(ord));
                    }

                }

            } else {
                SortedDocValues set = lr.getSortedDocValues(fld.getName());

                if (set != null) {
                    // For all documents that have the field "album_genres":
                    //
                    for (int docId = 0; docId != docs.length(); docId++) {
                        if (docs.get(docId)) {
                            // Get the term - Classical, Rock, etc.
                            //
                            BytesRef term = set.get(docId);

                            add(fld, valuesMap, filter, term, 1);
                        }
                    }
                }
            }
        } else {
            // Normal field, not a doc value.
            //
            Terms terms = lr.terms(fld.getName());
            TermsEnum te = terms.iterator();

            BytesRef term;
            while ((term = te.next()) != null) {
                add(fld, valuesMap, filter, term, te.docFreq());
            }
        }

        /*
         * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark"
         * 
         * doc[0] = 0 doc[1] = 1 doc[2] = 0
         * 
         * term[0] = "aardvark" term[1] = "beaver"
         */

        // http://127.0.0.1:8080/api/facets?fields=track_title_a
        // the above should return B:(4) because titles starting with B are
        // 4!
    }

    // Get the array of term counters.
    //
    IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]);

    // Sort by term.
    //
    Arrays.sort(list);

    // add to the map.
    //
    this.fieldCounts.put(fld.getName(), list);

    return this;
}

From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java

License:Apache License

@Override
public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter) {
    try {//from  w  w w  . j  a v  a2  s  .  c  om
        DocumentQueryBuilder queryBuilder = getQueryBuilder();
        for (String field : reader.fields()) {

            TokenStream ts = new TermsEnumTokenStream(reader.terms(field).iterator());
            for (PresearcherComponent component : components) {
                ts = component.filterDocumentTokens(field, ts);
            }

            ts = new BytesRefFilteredTokenFilter(ts, queryTermFilter.getTerms(field));

            TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class);
            while (ts.incrementToken()) {
                queryBuilder.addTerm(field, BytesRef.deepCopyOf(termAtt.getBytesRef()));
            }

        }
        Query presearcherQuery = queryBuilder.build();

        BooleanQuery.Builder bq = new BooleanQuery.Builder();
        bq.add(presearcherQuery, BooleanClause.Occur.SHOULD);
        bq.add(new TermQuery(new Term(ANYTOKEN_FIELD, ANYTOKEN)), BooleanClause.Occur.SHOULD);
        presearcherQuery = bq.build();

        for (PresearcherComponent component : components) {
            presearcherQuery = component.adjustPresearcherQuery(reader, presearcherQuery);
        }

        return presearcherQuery;
    } catch (IOException e) {
        // We're a MemoryIndex, so this shouldn't happen...
        throw new RuntimeException(e);
    }
}

From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java

License:Apache License

protected BytesRefHash buildTermsHash(String field, LeafReader reader) throws IOException {
    BytesRefHash terms = new BytesRefHash();
    Terms t = reader.terms(field);
    if (t == null) {
        return terms;
    }//  www  .j  av  a 2  s  . com
    TermsEnum te = t.iterator();
    BytesRef term;
    while ((term = te.next()) != null) {
        terms.add(term);
    }
    return terms;
}

From source file:uk.co.flax.luwak.QueryTermFilter.java

License:Apache License

/**
 * Create a QueryTermFilter for an IndexReader
 * @param reader the {@link IndexReader}
 * @throws IOException on error/*  w w w.jav a  2  s.  com*/
 */
public QueryTermFilter(IndexReader reader) throws IOException {
    LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader);
    for (String field : leafReader.fields()) {
        BytesRefHash terms = new BytesRefHash();
        Terms t = leafReader.terms(field);
        if (t != null) {
            TermsEnum te = t.iterator();
            BytesRef term;
            while ((term = te.next()) != null) {
                terms.add(term);
            }
        }
        termsHash.put(field, terms);
    }
}