List of usage examples for org.apache.lucene.util Bits get
boolean get(int index);
index. From source file:br.bireme.ngrams.NGrams.java
public static void export(NGIndex index, final NGSchema schema, final String outFile, final String outFileEncoding) throws IOException { if (index == null) { throw new NullPointerException("index"); }//from ww w . java 2s .co m if (schema == null) { throw new NullPointerException("schema"); } if (outFile == null) { throw new NullPointerException("outFile"); } if (outFileEncoding == null) { throw new NullPointerException("outFileEncoding"); } final Parameters parameters = schema.getParameters(); final TreeMap<Integer, String> fields = new TreeMap<>(); final IndexReader reader = index.getIndexSearcher().getIndexReader(); final int maxdoc = reader.maxDoc(); //final Bits liveDocs = MultiFields.getLiveDocs(reader); final Bits liveDocs = MultiBits.getLiveDocs(reader); final BufferedWriter writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName(outFileEncoding), StandardOpenOption.CREATE, StandardOpenOption.WRITE); boolean first = true; for (Map.Entry<Integer, br.bireme.ngrams.Field> entry : parameters.sfields.entrySet()) { fields.put(entry.getKey(), entry.getValue().name + NOT_NORMALIZED_FLD); } for (int docID = 0; docID < maxdoc; docID++) { if ((liveDocs != null) && (!liveDocs.get(docID))) continue; final Document doc = reader.document(docID); if (first) { first = false; } else { writer.newLine(); } writer.append(doc2pipe(doc, fields)); } writer.close(); reader.close(); }
From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java
License:Apache License
private void init(AtomicReader reader) throws IOException { int maxDoc = reader.maxDoc(); uidArray = new long[maxDoc]; NumericDocValues uidValues = reader.getNumericDocValues(AbstractEsearchIndexable.DOCUMENT_ID_PAYLOAD_FIELD); Bits liveDocs = reader.getLiveDocs(); for (int i = 0; i < maxDoc; ++i) { if (liveDocs != null && !liveDocs.get(i)) { uidArray[i] = EsearchSegmentReader.DELETED_UID; continue; }//from w ww .j ava 2 s. c om uidArray[i] = uidValues.get(i); } }
From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java
License:Apache License
@Override public Bits getLiveDocs() { ensureOpen();//www . ja v a 2 s. com return new Bits() { @Override public boolean get(int index) { int[] delSet = currentDelDocIds; if (delSet != null && Arrays.binarySearch(delSet, index) >= 0) { return false; } Bits liveDocs = in.getLiveDocs(); if (liveDocs == null) { return true; } return liveDocs.get(index); } @Override public int length() { return in.getLiveDocs().length(); } }; }
From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java
License:Apache License
public boolean isDeleted(int docid) { int[] delSet = currentDelDocIds; if (delSet != null && Arrays.binarySearch(delSet, docid) >= 0) { return true; }// w w w .j av a 2s . com Bits liveDocs = in.getLiveDocs(); if (liveDocs == null) { return false; } return !liveDocs.get(docid); }
From source file:com.floragunn.searchguard.configuration.DlsFlsFilterLeafReader.java
License:Open Source License
DlsFlsFilterLeafReader(final LeafReader delegate, final Set<String> includes, final Query dlsQuery) { super(delegate); flsEnabled = includes != null && !includes.isEmpty(); dlsEnabled = dlsQuery != null;/*from www . ja va 2 s . c o m*/ if (flsEnabled) { this.includes = includes.toArray(new String[0]); final FieldInfos infos = delegate.getFieldInfos(); final List<FieldInfo> fi = new ArrayList<FieldInfo>(infos.size()); for (final FieldInfo info : infos) { final String fname = info.name; if ((!WildcardMatcher.containsWildcard(fname) && includes.contains(fname)) || WildcardMatcher.matchAny(this.includes, fname)) { fi.add(info); } } this.flsFieldInfos = new FieldInfos(fi.toArray(new FieldInfo[0])); } else { this.includes = null; this.flsFieldInfos = null; } if (dlsEnabled) { try { //borrowed from Apache Lucene (Copyright Apache Software Foundation (ASF)) final IndexSearcher searcher = new IndexSearcher(this); searcher.setQueryCache(null); final boolean needsScores = false; final Weight preserveWeight = searcher.createNormalizedWeight(dlsQuery, needsScores); final int maxDoc = in.maxDoc(); final FixedBitSet bits = new FixedBitSet(maxDoc); final Scorer preverveScorer = preserveWeight.scorer(this.getContext()); if (preverveScorer != null) { bits.or(preverveScorer.iterator()); } if (in.hasDeletions()) { final Bits oldLiveDocs = in.getLiveDocs(); assert oldLiveDocs != null; final DocIdSetIterator it = new BitSetIterator(bits, 0L); for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) { if (!oldLiveDocs.get(i)) { bits.clear(i); } } } this.liveDocs = bits; this.numDocs = bits.cardinality(); } catch (Exception e) { throw new RuntimeException(e); } } else { this.liveDocs = null; this.numDocs = -1; } }
From source file:com.github.flaxsearch.resources.PostingsResource.java
License:Apache License
@GET public TermData getPostings(@QueryParam("segment") Integer segment, @PathParam("field") String field, @PathParam("term") String term, @QueryParam("count") @DefaultValue("2147483647") int count) throws IOException { TermsEnum te = readerManager.findTermPostings(segment, field, term); Bits liveDocs = readerManager.getLiveDocs(segment); PostingsEnum pe = te.postings(null, PostingsEnum.NONE); int docFreq = te.docFreq(); long totalTermFreq = te.totalTermFreq(); int size = (docFreq < count) ? docFreq : count; int[] postings = new int[size]; int docId;//from w w w . j av a2 s. com int i = 0; while ((docId = pe.nextDoc()) != PostingsEnum.NO_MORE_DOCS && i < count) { if (liveDocs != null && liveDocs.get(docId) == false) continue; postings[i] = docId; i++; } return new TermData(term, docFreq, totalTermFreq, postings); }
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) { try {// w ww . ja v a2s . c o m if (source == null) { throw new Exception("Source collection is missing."); } // create as a sibling path of the main index Directory d = main.directory(); File primaryDir = null; if (d instanceof FSDirectory) { String path = ((FSDirectory) d).getDirectory().getPath(); primaryDir = new File(path); sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation); } else { String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation + "-" + System.currentTimeMillis(); sidecarIndex = new File(secondaryPath); } // create a new tmp dir for the secondary indexes File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index"); if (rebuild) { safeDelete(sidecarIndex); } parallelFields.addAll(source.getFieldNames()); parallelFields.remove("id"); LOG.debug("building a new index"); Directory dir = FSDirectory.open(secondaryIndex); if (IndexWriter.isLocked(dir)) { // try forcing unlock try { IndexWriter.unlock(dir); } catch (Exception e) { LOG.warn("Failed to unlock " + secondaryIndex); } } int[] mergeTargets; AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main); if (subReaders == null || subReaders.length == 0) { mergeTargets = new int[] { main.maxDoc() }; } else { mergeTargets = new int[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { mergeTargets[i] = subReaders[i].maxDoc(); } } Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion(); IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer()); //cfg.setInfoStream(System.err); cfg.setMergeScheduler(new SerialMergeScheduler()); cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false)); IndexWriter iw = new IndexWriter(dir, cfg); LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index"); int boostedDocs = 0; Bits live = MultiFields.getLiveDocs(main); int targetPos = 0; int nextTarget = mergeTargets[targetPos]; BytesRef idRef = new BytesRef(); for (int i = 0; i < main.maxDoc(); i++) { if (i == nextTarget) { iw.commit(); nextTarget = nextTarget + mergeTargets[++targetPos]; } if (live != null && !live.get(i)) { addDummy(iw); // this is required to preserve doc numbers. continue; } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField); main.document(i, visitor); Document doc = visitor.getDocument(); // get docId String id = doc.get(docIdField); if (id == null) { LOG.debug("missing id, docNo=" + i); addDummy(iw); continue; } else { // find the data, if any doc = lookup(source, id, idRef, parallelFields); if (doc == null) { LOG.debug("missing boost data, docId=" + id); addDummy(iw); continue; } else { LOG.debug("adding boost data, docId=" + id + ", b=" + doc); iw.addDocument(doc); boostedDocs++; } } } } iw.close(); DirectoryReader other = DirectoryReader.open(dir); LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents."); SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex); return pr; } catch (Exception e) { LOG.warn("Unable to build parallel index: " + e.toString(), e); LOG.warn("Proceeding with single main index."); try { return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main), sourceCollection, null); } catch (Exception e1) { LOG.warn("Unexpected exception, returning single main index", e1); return main; } } }
From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java
License:Apache License
private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) { if (liveDocs == null) { return doc; }//w w w . j a v a2 s .c om while (doc < maxDoc && !liveDocs.get(doc)) { ++doc; } return doc; }
From source file:com.searchbox.SuggeterDataStructureBuilder.java
License:Apache License
private void iterateThroughDocuments(SolrIndexSearcher searcher, String[] fields, int maxNumDocs) { IndexReader reader = searcher.getIndexReader(); // WARNING: returns null if there are no deletions Bits liveDocs = MultiFields.getLiveDocs(reader); maxNumDocs = Math.min(maxNumDocs, reader.maxDoc()); if (maxNumDocs == -1) { maxNumDocs = reader.maxDoc();/* w w w. j a v a 2 s . co m*/ } LOGGER.info("Analyzing docs:\t" + numdocs); for (int docID = 0; docID < reader.maxDoc(); docID++) { if (numdocs > maxNumDocs) { break; } if (liveDocs != null && !liveDocs.get(docID)) { continue; // deleted } if ((docID % 1000) == 0) { LOGGER.debug("Doing " + docID + " of " + maxNumDocs); } StringBuilder text = new StringBuilder(); for (String field : fields) { /* * not sure if this is the best way, might make sense to do a * process text for each field individually, but then book * keeping the doc freq for terms becomes a bit of a pain in the * ass */ try { IndexableField[] multifield = reader.document(docID).getFields(field); for (IndexableField singlefield : multifield) { // create one big string from all of the text in the // documents for processing later on text.append(". " + singlefield.stringValue()); } } catch (IOException ex) { LOGGER.warn("Document " + docID + " missing requested field (" + field + ")...ignoring"); } } // might as well see if its empty if (text.length() > 0) { // actually processes the massive string which was created from // all of the above fields processText(text.toString().toLowerCase()); numdocs++; } } LOGGER.info("Number of documents analyzed: \t" + numdocs); for (int zz = 0; zz < counts.length; zz++) { LOGGER.info("Number of " + zz + "-grams: \t" + counts[zz]); } }
From source file:com.searchbox.Tagger.java
License:Apache License
private void DfCountBuilder(SolrIndexSearcher searcher, String[] fields, int maxNumDocs) { IndexReader reader = searcher.getIndexReader(); Bits liveDocs = MultiFields.getLiveDocs(reader); // WARNING: returns null if // there are no deletions maxNumDocs = Math.min(maxNumDocs, reader.maxDoc()); if (maxNumDocs == -1) { maxNumDocs = reader.maxDoc();//from ww w. ja v a 2 s .c o m } LOGGER.info("Analyzing docs:\t" + numdocs); for (int docID = 0; docID < reader.maxDoc(); docID++) { if (numdocs > maxNumDocs) { break; } if (liveDocs != null && !liveDocs.get(docID)) { continue; // deleted } if ((docID % 1000) == 0) { LOGGER.debug("Doing " + docID + " of " + maxNumDocs); } StringBuilder text = new StringBuilder(); for (String field : fields) { // not sure if this is the best way, might // make sense to do a // process text for each field individually, but then book keeping // the doc freq for terms becomes a bit of a pain in the ass try { text.append(". " + reader.document(docID).get(field)); } catch (IOException ex) { LOGGER.warn("Document " + docID + " missing requested field (" + field + ")...ignoring"); } } if (text.length() > 0) { // might as well see if its empty processDocText(text.toString()); numdocs++; } } LOGGER.info("Number of documents analyzed: \t" + numdocs); dfcounts.put(DOC_COUNTS_STRING, numdocs); tfcounts.put(DOC_COUNTS_STRING, numdocs); }