List of usage examples for org.apache.lucene.index LeafReader terms
public abstract Terms terms(String field) throws IOException;
From source file:org.voyanttools.trombone.lucene.CorpusMapper.java
License:Open Source License
/** * This should not be called, except from the private build() method. * @throws IOException/*from ww w . j a v a2 s . c o m*/ */ private void buildFromTermsEnum() throws IOException { LeafReader reader = SlowCompositeReaderWrapper .wrap(storage.getLuceneManager().getDirectoryReader(corpus.getId())); Terms terms = reader.terms("id"); TermsEnum termsEnum = terms.iterator(); BytesRef bytesRef = termsEnum.next(); int doc; String id; Set<String> ids = new HashSet<String>(getCorpusDocumentIds()); bitSet = new SparseFixedBitSet(reader.numDocs()); Bits liveBits = reader.getLiveDocs(); while (bytesRef != null) { PostingsEnum postingsEnum = termsEnum.postings(null, PostingsEnum.NONE); doc = postingsEnum.nextDoc(); if (doc != PostingsEnum.NO_MORE_DOCS) { id = bytesRef.utf8ToString(); if (ids.contains(id)) { bitSet.set(doc); luceneIds.add(doc); documentIdToLuceneIdMap.put(id, doc); luceneIdToDocumentIdMap.put(doc, id); } } bytesRef = termsEnum.next(); } this.reader = new FilteredCorpusReader(reader, bitSet); }
From source file:suonos.lucene.fields.IndexedFieldCountsBuilder.java
License:Apache License
public IndexedFieldCountsBuilder addField(String fieldName, String filter) throws IOException { final IndexedField fld = models.indexedField(fieldName); final Map<String, IndexedFieldTermCount> valuesMap = AntLib.newHashMap(); final TIntIntHashMap ordCounts = new TIntIntHashMap(); if (filter != null) { filter = filter.toLowerCase();//from w ww . j av a2s. c o m } // Get count of segments. // int sz = ir.leaves().size(); for (int i = 0; i != sz; i++) { // Get the segment reader. // LeafReader lr = ir.leaves().get(i).reader(); // Doc count for field. Eg "album_genres" // lr.getDocCount(fld.getName()); // Get all documents that have the field "album_genres" // Bits docs = lr.getDocsWithField(fld.getName()); ordCounts.clear(); // Enumerate the field terms. // if (fld.isDocValues()) { if (fld.isMultiValue()) { // docvalues & multivalue is a SortedSetDocValues // Per-Document values in a SortedDocValues are // deduplicated, dereferenced, and sorted into a dictionary // of // unique values. A pointer to the dictionary value // (ordinal) can be retrieved for each document. // Ordinals are dense and in increasing sorted order. // SortedSetDocValues set = lr.getSortedSetDocValues(fld.getName()); if (set != null) { // For all documents that have the field "album_genres": // for (int docId = 0; docId != docs.length(); docId++) { if (docs.get(docId)) { // Enumerate the set of [terms] of // "album_genres" for the document represented // by docId. // Each ord represents the term value. // set.setDocument(docId); // For each term bump up the frequency. // long ord; while ((ord = set.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { ordCounts.adjustOrPutValue((int) ord, 1, 1); System.out.println("term=" + set.lookupOrd(ord).utf8ToString()); } } } TermsEnum te = set.termsEnum(); BytesRef term; while ((term = te.next()) != null) { int ord = (int) te.ord(); add(fld, valuesMap, filter, term, ordCounts.get(ord)); } } } else { SortedDocValues set = lr.getSortedDocValues(fld.getName()); if (set != null) { // For all documents that have the field "album_genres": // for (int docId = 0; docId != docs.length(); docId++) { if (docs.get(docId)) { // Get the term - Classical, Rock, etc. // BytesRef term = set.get(docId); add(fld, valuesMap, filter, term, 1); } } } } } else { // Normal field, not a doc value. // Terms terms = lr.terms(fld.getName()); TermsEnum te = terms.iterator(); BytesRef term; while ((term = te.next()) != null) { add(fld, valuesMap, filter, term, te.docFreq()); } } /* * SORTED doc[0] = "aardvark" doc[1] = "beaver" doc[2] = "aardvark" * * doc[0] = 0 doc[1] = 1 doc[2] = 0 * * term[0] = "aardvark" term[1] = "beaver" */ // http://127.0.0.1:8080/api/facets?fields=track_title_a // the above should return B:(4) because titles starting with B are // 4! } // Get the array of term counters. // IndexedFieldTermCount[] list = valuesMap.values().toArray(new IndexedFieldTermCount[0]); // Sort by term. // Arrays.sort(list); // add to the map. // this.fieldCounts.put(fld.getName(), list); return this; }
From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java
License:Apache License
@Override public final Query buildQuery(LeafReader reader, QueryTermFilter queryTermFilter) { try {//from w w w . j a v a2 s . c om DocumentQueryBuilder queryBuilder = getQueryBuilder(); for (String field : reader.fields()) { TokenStream ts = new TermsEnumTokenStream(reader.terms(field).iterator()); for (PresearcherComponent component : components) { ts = component.filterDocumentTokens(field, ts); } ts = new BytesRefFilteredTokenFilter(ts, queryTermFilter.getTerms(field)); TermToBytesRefAttribute termAtt = ts.addAttribute(TermToBytesRefAttribute.class); while (ts.incrementToken()) { queryBuilder.addTerm(field, BytesRef.deepCopyOf(termAtt.getBytesRef())); } } Query presearcherQuery = queryBuilder.build(); BooleanQuery.Builder bq = new BooleanQuery.Builder(); bq.add(presearcherQuery, BooleanClause.Occur.SHOULD); bq.add(new TermQuery(new Term(ANYTOKEN_FIELD, ANYTOKEN)), BooleanClause.Occur.SHOULD); presearcherQuery = bq.build(); for (PresearcherComponent component : components) { presearcherQuery = component.adjustPresearcherQuery(reader, presearcherQuery); } return presearcherQuery; } catch (IOException e) { // We're a MemoryIndex, so this shouldn't happen... throw new RuntimeException(e); } }
From source file:uk.co.flax.luwak.presearcher.TermFilteredPresearcher.java
License:Apache License
protected BytesRefHash buildTermsHash(String field, LeafReader reader) throws IOException { BytesRefHash terms = new BytesRefHash(); Terms t = reader.terms(field); if (t == null) { return terms; }// www .j av a 2 s . com TermsEnum te = t.iterator(); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } return terms; }
From source file:uk.co.flax.luwak.QueryTermFilter.java
License:Apache License
/** * Create a QueryTermFilter for an IndexReader * @param reader the {@link IndexReader} * @throws IOException on error/* w w w.jav a 2 s. com*/ */ public QueryTermFilter(IndexReader reader) throws IOException { LeafReader leafReader = SlowCompositeReaderWrapper.wrap(reader); for (String field : leafReader.fields()) { BytesRefHash terms = new BytesRefHash(); Terms t = leafReader.terms(field); if (t != null) { TermsEnum te = t.iterator(); BytesRef term; while ((term = te.next()) != null) { terms.add(term); } } termsHash.put(field, terms); } }