Example usage for org.apache.lucene.util Bits get

List of usage examples for org.apache.lucene.util Bits get

Introduction

In this page you can find the example usage for org.apache.lucene.util Bits get.

Prototype

boolean get(int index);

Source Link

Document

Returns the value of the bit with the specified index.

Usage

From source file:br.bireme.ngrams.NGrams.java

public static void export(NGIndex index, final NGSchema schema, final String outFile,
        final String outFileEncoding) throws IOException {
    if (index == null) {
        throw new NullPointerException("index");
    }//from ww  w  . java  2s  .co m
    if (schema == null) {
        throw new NullPointerException("schema");
    }
    if (outFile == null) {
        throw new NullPointerException("outFile");
    }
    if (outFileEncoding == null) {
        throw new NullPointerException("outFileEncoding");
    }
    final Parameters parameters = schema.getParameters();
    final TreeMap<Integer, String> fields = new TreeMap<>();
    final IndexReader reader = index.getIndexSearcher().getIndexReader();
    final int maxdoc = reader.maxDoc();
    //final Bits liveDocs = MultiFields.getLiveDocs(reader);
    final Bits liveDocs = MultiBits.getLiveDocs(reader);
    final BufferedWriter writer = Files.newBufferedWriter(Paths.get(outFile), Charset.forName(outFileEncoding),
            StandardOpenOption.CREATE, StandardOpenOption.WRITE);

    boolean first = true;

    for (Map.Entry<Integer, br.bireme.ngrams.Field> entry : parameters.sfields.entrySet()) {
        fields.put(entry.getKey(), entry.getValue().name + NOT_NORMALIZED_FLD);
    }

    for (int docID = 0; docID < maxdoc; docID++) {
        if ((liveDocs != null) && (!liveDocs.get(docID)))
            continue;
        final Document doc = reader.document(docID);

        if (first) {
            first = false;
        } else {
            writer.newLine();
        }
        writer.append(doc2pipe(doc, fields));
    }
    writer.close();
    reader.close();
}

From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java

License:Apache License

private void init(AtomicReader reader) throws IOException {
    int maxDoc = reader.maxDoc();
    uidArray = new long[maxDoc];
    NumericDocValues uidValues = reader.getNumericDocValues(AbstractEsearchIndexable.DOCUMENT_ID_PAYLOAD_FIELD);
    Bits liveDocs = reader.getLiveDocs();
    for (int i = 0; i < maxDoc; ++i) {
        if (liveDocs != null && !liveDocs.get(i)) {
            uidArray[i] = EsearchSegmentReader.DELETED_UID;
            continue;
        }//from w ww  .j ava 2  s. c  om
        uidArray[i] = uidValues.get(i);
    }
}

From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java

License:Apache License

@Override
public Bits getLiveDocs() {
    ensureOpen();//www  . ja  v  a  2  s.  com
    return new Bits() {
        @Override
        public boolean get(int index) {
            int[] delSet = currentDelDocIds;
            if (delSet != null && Arrays.binarySearch(delSet, index) >= 0) {
                return false;
            }
            Bits liveDocs = in.getLiveDocs();
            if (liveDocs == null) {
                return true;
            }
            return liveDocs.get(index);
        }

        @Override
        public int length() {
            return in.getLiveDocs().length();
        }
    };
}

From source file:cn.hbu.cs.esearch.core.EsearchSegmentReader.java

License:Apache License

public boolean isDeleted(int docid) {
    int[] delSet = currentDelDocIds;
    if (delSet != null && Arrays.binarySearch(delSet, docid) >= 0) {
        return true;
    }//  w  w w .j  av a 2s . com
    Bits liveDocs = in.getLiveDocs();
    if (liveDocs == null) {
        return false;
    }
    return !liveDocs.get(docid);
}

From source file:com.floragunn.searchguard.configuration.DlsFlsFilterLeafReader.java

License:Open Source License

DlsFlsFilterLeafReader(final LeafReader delegate, final Set<String> includes, final Query dlsQuery) {
    super(delegate);

    flsEnabled = includes != null && !includes.isEmpty();
    dlsEnabled = dlsQuery != null;/*from www . ja  va 2 s  . c  o m*/

    if (flsEnabled) {
        this.includes = includes.toArray(new String[0]);
        final FieldInfos infos = delegate.getFieldInfos();

        final List<FieldInfo> fi = new ArrayList<FieldInfo>(infos.size());
        for (final FieldInfo info : infos) {
            final String fname = info.name;
            if ((!WildcardMatcher.containsWildcard(fname) && includes.contains(fname))
                    || WildcardMatcher.matchAny(this.includes, fname)) {
                fi.add(info);
            }
        }

        this.flsFieldInfos = new FieldInfos(fi.toArray(new FieldInfo[0]));
    } else {
        this.includes = null;
        this.flsFieldInfos = null;
    }

    if (dlsEnabled) {
        try {

            //borrowed from Apache Lucene (Copyright Apache Software Foundation (ASF))
            final IndexSearcher searcher = new IndexSearcher(this);
            searcher.setQueryCache(null);
            final boolean needsScores = false;
            final Weight preserveWeight = searcher.createNormalizedWeight(dlsQuery, needsScores);

            final int maxDoc = in.maxDoc();
            final FixedBitSet bits = new FixedBitSet(maxDoc);
            final Scorer preverveScorer = preserveWeight.scorer(this.getContext());
            if (preverveScorer != null) {
                bits.or(preverveScorer.iterator());
            }

            if (in.hasDeletions()) {
                final Bits oldLiveDocs = in.getLiveDocs();
                assert oldLiveDocs != null;
                final DocIdSetIterator it = new BitSetIterator(bits, 0L);
                for (int i = it.nextDoc(); i != DocIdSetIterator.NO_MORE_DOCS; i = it.nextDoc()) {
                    if (!oldLiveDocs.get(i)) {
                        bits.clear(i);
                    }
                }
            }

            this.liveDocs = bits;
            this.numDocs = bits.cardinality();

        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    } else {
        this.liveDocs = null;
        this.numDocs = -1;
    }
}

From source file:com.github.flaxsearch.resources.PostingsResource.java

License:Apache License

@GET
public TermData getPostings(@QueryParam("segment") Integer segment, @PathParam("field") String field,
        @PathParam("term") String term, @QueryParam("count") @DefaultValue("2147483647") int count)
        throws IOException {

    TermsEnum te = readerManager.findTermPostings(segment, field, term);
    Bits liveDocs = readerManager.getLiveDocs(segment);
    PostingsEnum pe = te.postings(null, PostingsEnum.NONE);

    int docFreq = te.docFreq();
    long totalTermFreq = te.totalTermFreq();

    int size = (docFreq < count) ? docFreq : count;
    int[] postings = new int[size];
    int docId;//from  w  w w  .  j  av  a2  s.  com
    int i = 0;
    while ((docId = pe.nextDoc()) != PostingsEnum.NO_MORE_DOCS && i < count) {
        if (liveDocs != null && liveDocs.get(docId) == false)
            continue;
        postings[i] = docId;
        i++;
    }
    return new TermData(term, docFreq, totalTermFreq, postings);
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
    try {// w  ww  . ja v a2s  . c o  m
        if (source == null) {
            throw new Exception("Source collection is missing.");
        }
        // create as a sibling path of the main index
        Directory d = main.directory();
        File primaryDir = null;
        if (d instanceof FSDirectory) {
            String path = ((FSDirectory) d).getDirectory().getPath();
            primaryDir = new File(path);
            sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
        } else {
            String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                    + "-" + System.currentTimeMillis();
            sidecarIndex = new File(secondaryPath);
        }
        // create a new tmp dir for the secondary indexes
        File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
        if (rebuild) {
            safeDelete(sidecarIndex);
        }
        parallelFields.addAll(source.getFieldNames());
        parallelFields.remove("id");
        LOG.debug("building a new index");
        Directory dir = FSDirectory.open(secondaryIndex);
        if (IndexWriter.isLocked(dir)) {
            // try forcing unlock
            try {
                IndexWriter.unlock(dir);
            } catch (Exception e) {
                LOG.warn("Failed to unlock " + secondaryIndex);
            }
        }
        int[] mergeTargets;
        AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
        if (subReaders == null || subReaders.length == 0) {
            mergeTargets = new int[] { main.maxDoc() };
        } else {
            mergeTargets = new int[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                mergeTargets[i] = subReaders[i].maxDoc();
            }
        }
        Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
        IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
        //cfg.setInfoStream(System.err);
        cfg.setMergeScheduler(new SerialMergeScheduler());
        cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
        IndexWriter iw = new IndexWriter(dir, cfg);
        LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
        int boostedDocs = 0;
        Bits live = MultiFields.getLiveDocs(main);

        int targetPos = 0;
        int nextTarget = mergeTargets[targetPos];
        BytesRef idRef = new BytesRef();
        for (int i = 0; i < main.maxDoc(); i++) {
            if (i == nextTarget) {
                iw.commit();
                nextTarget = nextTarget + mergeTargets[++targetPos];
            }
            if (live != null && !live.get(i)) {
                addDummy(iw); // this is required to preserve doc numbers.
                continue;
            } else {
                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                main.document(i, visitor);
                Document doc = visitor.getDocument();
                // get docId
                String id = doc.get(docIdField);
                if (id == null) {
                    LOG.debug("missing id, docNo=" + i);
                    addDummy(iw);
                    continue;
                } else {
                    // find the data, if any
                    doc = lookup(source, id, idRef, parallelFields);
                    if (doc == null) {
                        LOG.debug("missing boost data, docId=" + id);
                        addDummy(iw);
                        continue;
                    } else {
                        LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                        iw.addDocument(doc);
                        boostedDocs++;
                    }
                }
            }
        }
        iw.close();
        DirectoryReader other = DirectoryReader.open(dir);
        LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
        SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
        return pr;
    } catch (Exception e) {
        LOG.warn("Unable to build parallel index: " + e.toString(), e);
        LOG.warn("Proceeding with single main index.");
        try {
            return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                    sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return main;
        }
    }
}

From source file:com.lucure.core.codec.CompressingStoredFieldsWriter.java

License:Apache License

private static int nextLiveDoc(int doc, Bits liveDocs, int maxDoc) {
    if (liveDocs == null) {
        return doc;
    }//w w  w  . j  a  v a2  s  .c  om
    while (doc < maxDoc && !liveDocs.get(doc)) {
        ++doc;
    }
    return doc;
}

From source file:com.searchbox.SuggeterDataStructureBuilder.java

License:Apache License

private void iterateThroughDocuments(SolrIndexSearcher searcher, String[] fields, int maxNumDocs) {
    IndexReader reader = searcher.getIndexReader();
    // WARNING: returns null if there are no deletions
    Bits liveDocs = MultiFields.getLiveDocs(reader);

    maxNumDocs = Math.min(maxNumDocs, reader.maxDoc());

    if (maxNumDocs == -1) {
        maxNumDocs = reader.maxDoc();/*  w w w.  j  a v  a 2  s  .  co  m*/
    }
    LOGGER.info("Analyzing docs:\t" + numdocs);

    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        if (numdocs > maxNumDocs) {
            break;
        }
        if (liveDocs != null && !liveDocs.get(docID)) {
            continue; // deleted
        }

        if ((docID % 1000) == 0) {
            LOGGER.debug("Doing " + docID + " of " + maxNumDocs);
        }

        StringBuilder text = new StringBuilder();
        for (String field : fields) {
            /*
             * not sure if this is the best way, might make sense to do a
             * process text for each field individually, but then book
             * keeping the doc freq for terms becomes a bit of a pain in the
             * ass
             */
            try {
                IndexableField[] multifield = reader.document(docID).getFields(field);
                for (IndexableField singlefield : multifield) {
                    // create one big string from all of the text in the
                    // documents for processing later on
                    text.append(". " + singlefield.stringValue());
                }

            } catch (IOException ex) {
                LOGGER.warn("Document " + docID + " missing requested field (" + field + ")...ignoring");
            }
        }
        // might as well see if its empty
        if (text.length() > 0) {
            // actually processes the massive string which was created from
            // all of the above fields
            processText(text.toString().toLowerCase());
            numdocs++;
        }
    }

    LOGGER.info("Number of documents analyzed: \t" + numdocs);
    for (int zz = 0; zz < counts.length; zz++) {
        LOGGER.info("Number of " + zz + "-grams: \t" + counts[zz]);
    }
}

From source file:com.searchbox.Tagger.java

License:Apache License

private void DfCountBuilder(SolrIndexSearcher searcher, String[] fields, int maxNumDocs) {
    IndexReader reader = searcher.getIndexReader();
    Bits liveDocs = MultiFields.getLiveDocs(reader); // WARNING: returns null if
                                                     // there are no deletions

    maxNumDocs = Math.min(maxNumDocs, reader.maxDoc());

    if (maxNumDocs == -1) {
        maxNumDocs = reader.maxDoc();//from ww w. ja v a  2 s .c o  m
    }
    LOGGER.info("Analyzing docs:\t" + numdocs);

    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        if (numdocs > maxNumDocs) {
            break;
        }
        if (liveDocs != null && !liveDocs.get(docID)) {
            continue; // deleted
        }

        if ((docID % 1000) == 0) {
            LOGGER.debug("Doing " + docID + " of " + maxNumDocs);
        }

        StringBuilder text = new StringBuilder();
        for (String field : fields) { // not sure if this is the best way, might
                                      // make sense to do a
                                      // process text for each field individually, but then book keeping
                                      // the doc freq for terms becomes a bit of a pain in the ass
            try {
                text.append(". " + reader.document(docID).get(field));
            } catch (IOException ex) {
                LOGGER.warn("Document " + docID + " missing requested field (" + field + ")...ignoring");
            }
        }
        if (text.length() > 0) { // might as well see if its empty
            processDocText(text.toString());
            numdocs++;

        }
    }

    LOGGER.info("Number of documents analyzed: \t" + numdocs);
    dfcounts.put(DOC_COUNTS_STRING, numdocs);
    tfcounts.put(DOC_COUNTS_STRING, numdocs);
}