Example usage for org.apache.lucene.index SegmentInfos size

List of usage examples for org.apache.lucene.index SegmentInfos size

Introduction

In this page you can find the example usage for org.apache.lucene.index SegmentInfos size.

Prototype

public int size() 

Source Link

Document

Returns number of SegmentCommitInfo s.

Usage

From source file:axiom.objectmodel.dom.IndexOptimizingRunner.java

License:Open Source License

protected int segmentCount() {
    int count = 0;
    SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(this.directory);
    if (sinfos != null) {
        count = sinfos.size();
    }//from   w  w w  . j  a  va 2 s.  c om
    return count;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

@Override
protected boolean isMerged(SegmentInfos infos, int maxNumSegments,
        Map<SegmentInfoPerCommit, Boolean> segmentsToOptimize) throws IOException {
    final int numSegments = infos.size();
    int numToOptimize = 0;
    SegmentInfoPerCommit optimizeInfo = null;
    for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) {
        final SegmentInfoPerCommit info = infos.info(i);
        if (segmentsToOptimize.get(info)) {
            numToOptimize++;//  w w w .  ja  v  a  2  s  .  com
            optimizeInfo = info;
        }
    }

    return numToOptimize <= maxNumSegments && (numToOptimize != 1 || isMerged(optimizeInfo));
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/** Returns the merges necessary to optimize the index.
 *  This merge policy defines "optimized" to mean only one
 *  segment in the index, where that segment has no
 *  deletions pending nor separate norms, and it is in
 *  compound file format if the current useCompoundFile
 *  setting is true.  This method returns multiple merges
 *  (mergeFactor at a time) so the {@link org.apache.lucene.index.MergeScheduler}
 *  in use may make use of concurrency. */
@Override/*w w  w . j  ava 2 s . c  o  m*/
public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments,
        Map<SegmentInfoPerCommit, Boolean> segmentsToOptimize) throws IOException {

    assert maxNumSegments > 0;

    MergeSpecification spec = null;

    if (!isMerged(infos, maxNumSegments, segmentsToOptimize)) {
        // Find the newest (rightmost) segment that needs to
        // be optimized (other segments may have been flushed
        // since optimize started):
        int last = infos.size();
        while (last > 0) {
            final SegmentInfoPerCommit info = infos.info(--last);
            if (segmentsToOptimize.get(info)) {
                last++;
                break;
            }
        }

        if (last > 0) {
            if (maxNumSegments == 1) {
                // Since we must optimize down to 1 segment, the
                // choice is simple:
                // boolean useCompoundFile = getUseCompoundFile();
                if (last > 1 || !isMerged(infos.info(0))) {
                    spec = new MergeSpecification();
                    spec.add(new OneMerge(infos.asList().subList(0, last)));
                }
            } else if (last > maxNumSegments) {
                // find most balanced merges
                spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge);
            }
        }
    }
    return spec;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/**
 * Finds merges necessary to expunge all deletes from the
 * index. The number of large segments will stay the same.
 */// www  . j  av a 2  s. co  m
@Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos)
        throws CorruptIndexException, IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments);
    MergeSpecification spec = null;

    if (numLargeSegs < numSegs) {
        List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        spec = super.findForcedDeletesMerges(smallSegments);
    }

    if (spec == null)
        spec = new MergeSpecification();
    for (int i = 0; i < numLargeSegs; i++) {
        SegmentInfoPerCommit info = infos.info(i);
        if (info.hasDeletions()) {
            spec.add(new OneMerge(infos.asList().subList(i, i + 1)));
        }
    }
    return spec;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/** Checks if any merges are now necessary and returns a
 *  {@link org.apache.lucene.index.MergePolicy.MergeSpecification} if so.
 *  This merge policy try to maintain {@link
 *  #setNumLargeSegments} of large segments in similar sizes.
 *  {@link org.apache.lucene.index.LogByteSizeMergePolicy} to small segments.
 *  Small segments are merged and promoted to a large segment
 *  when the total size reaches the average size of large segments.
 *//*  ww w. jav a  2  s  .  c om*/
@Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos) throws IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = _numLargeSegments;

    if (numSegs <= numLargeSegs)
        return null;

    long totalLargeSegSize = 0;
    long totalSmallSegSize = 0;
    SegmentInfoPerCommit info;

    // compute the total size of large segments
    for (int i = 0; i < numLargeSegs; i++) {
        info = infos.info(i);
        totalLargeSegSize += size(info);
    }
    // compute the total size of small segments
    for (int i = numLargeSegs; i < numSegs; i++) {
        info = infos.info(i);
        totalSmallSegSize += size(info);
    }

    long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1));
    if (targetSegSize <= totalSmallSegSize) {
        // the total size of small segments is big enough,
        // promote the small segments to a large segment and do balanced merge,

        if (totalSmallSegSize < targetSegSize * 2) {
            MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1),
                    _partialExpunge);
            if (spec == null)
                spec = new MergeSpecification(); // should not happen
            spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
            return spec;
        } else {
            return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
        }
    } else if (_maxSegments < numSegs) {
        // we have more than _maxSegments, merge small segments smaller than targetSegSize/4
        MergeSpecification spec = new MergeSpecification();
        int startSeg = numLargeSegs;
        long sizeThreshold = (targetSegSize / 4);
        while (startSeg < numSegs) {
            info = infos.info(startSeg);
            if (size(info) < sizeThreshold)
                break;
            startSeg++;
        }
        spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
        return spec;
    } else {
        // apply the log merge policy to small segments.
        List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        MergeSpecification spec = super.findMerges(mergeTrigger, smallSegments);

        if (_partialExpunge) {
            OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs);
            if (expunge != null) {
                if (spec == null)
                    spec = new MergeSpecification();
                spec.add(expunge);
            }
        }
        return spec;
    }
}

From source file:cn.hbu.cs.esearch.util.IndexUtil.java

License:Apache License

public static int getNumSegments(Directory idx) throws IOException {
    SegmentInfos infos = new SegmentInfos();
    infos.read(idx);//  w ww .  j a  va 2s.c  o  m
    return infos.size();
}

From source file:cn.hbu.cs.esearch.util.IndexUtil.java

License:Apache License

public static String getSegmentsInfo(Directory idx) {
    SegmentInfos infos = new SegmentInfos();
    try {/*w ww . ja v a 2 s  . co m*/
        infos.read(idx);
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < infos.size(); i++) {
            SegmentInfoPerCommit segInfo = infos.info(i);
            buf.append("[").append(segInfo.info.name).append(",numDoc:").append(segInfo.info.getDocCount())
                    .append(",numDel:").append(segInfo.getDelCount()).append("]");
        }
        return buf.toString();
    } catch (Exception e) {
        return e.toString();
    }
}

From source file:org.apache.solr.core.TestSimpleTextCodec.java

License:Apache License

public void test() throws Exception {
    SolrConfig config = h.getCore().getSolrConfig();
    String codecFactory = config.get("codecFactory/@class");
    assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory);

    assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName());

    RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore());
    try {/*from  ww w.j  ava 2  s . c  o  m*/
        IndexWriter writer = writerRef.get();
        assertEquals("Unexpected codec in IndexWriter config", "SimpleText",
                writer.getConfig().getCodec().getName());
    } finally {
        writerRef.decref();
    }

    assertU(add(doc("id", "1", "text", "textual content goes here")));
    assertU(commit());

    RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher();
    try {
        SolrIndexSearcher searcher = searcherRef.get();
        SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory());
        SegmentInfo info = infos.info(infos.size() - 1).info;
        assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName());
    } finally {
        searcherRef.decref();
    }

    assertQ(req("q", "id:1"), "*[count(//doc)=1]");
}

From source file:org.dspace.app.util.IndexVersion.java

License:BSD License

/**
 * Determine the version of Solr/Lucene which was used to create a given index directory.
 * //from w ww. j ava 2  s.  c om
 * @param indexDirPath
 *          Full path of the Solr/Lucene index directory
 * @return version as a string (e.g. "4.4"), empty string ("") if index directory is empty,
 *         or null if directory doesn't exist.
 * @throws IOException 
 */
public static String getIndexVersion(String indexDirPath) throws IOException {
    String indexVersion = null;

    // Make sure this directory exists
    File dir = new File(indexDirPath);
    if (dir.exists() && dir.isDirectory()) {
        // Check if this index directory has any contents
        String[] dirContents = dir.list();
        // If this directory is empty, return an empty string.
        // It is a valid directory, but it's an empty index.
        if (dirContents != null && dirContents.length == 0) {
            return "";
        }

        // Open this index directory in Lucene
        Directory indexDir = FSDirectory.open(dir);

        // Get info on the Lucene segment file(s) in index directory
        SegmentInfos sis = new SegmentInfos();
        try {
            sis.read(indexDir);
        } catch (IOException ie) {
            // Wrap default IOException, providing more info about which directory cannot be read
            throw new IOException("Could not read Lucene segments files in " + dir.getAbsolutePath(), ie);
        }

        // If we have a valid Solr index dir, but it has no existing segments
        // then just return an empty string. It's a valid but empty index.
        if (sis != null && sis.size() == 0) {
            return "";
        }

        // Loop through our Lucene segment files to locate the OLDEST
        // version. It is possible for individual segment files to be
        // created by different versions of Lucene. So, we just need
        // to find the oldest version of Lucene which created these
        // index segment files. 
        // This logic borrowed from Lucene v.4.10 CheckIndex class:
        // https://github.com/apache/lucene-solr/blob/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java#L426
        // WARNING: It MAY require updating whenever we upgrade the 
        // "lucene.version" in our DSpace Parent POM
        Version oldest = null;
        Version oldSegment = null;
        for (SegmentCommitInfo si : sis) {
            // Get the version of Lucene which created this segment file
            Version version = si.info.getVersion();
            if (version == null) {
                // If null, then this is a pre-3.1 segment file.
                // For our purposes, we will just assume it is "3.0", 
                // This lets us know we will need to upgrade it to 3.5
                // before upgrading to Solr/Lucene 4.x or above
                try {
                    oldSegment = Version.parse("3.0");
                } catch (ParseException pe) {
                    throw new IOException(pe);
                }
            }
            // else if this segment is older than our oldest thus far
            else if (oldest == null || version.onOrAfter(oldest) == false) {
                // We have a new oldest segment version
                oldest = version;
            }
        }

        // If we found a really old segment, compare it to the oldest
        // to see which is actually older
        if (oldSegment != null && oldSegment.onOrAfter(oldest) == false) {
            oldest = oldSegment;
        }

        // At this point, we should know what version of Lucene created our
        // oldest segment file. We will return this as the Index version
        // as it's the oldest segment we will need to upgrade.
        if (oldest != null) {
            indexVersion = oldest.toString();
        }
    }

    return indexVersion;
}

From source file:org.elasticsearch.index.merge.policy.BalancedSegmentMergePolicy.java

License:Apache License

private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments,
        Set<SegmentInfo> segmentsToOptimize) throws IOException {
    final int numSegments = infos.size();
    int numToOptimize = 0;
    SegmentInfo optimizeInfo = null;/*  www . jav  a2s .  c om*/
    for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) {
        final SegmentInfo info = infos.info(i);
        if (segmentsToOptimize.contains(info)) {
            numToOptimize++;
            optimizeInfo = info;
        }
    }

    return numToOptimize <= maxNumSegments && (numToOptimize != 1 || isOptimized(writer, optimizeInfo));
}