Example usage for org.apache.lucene.index SegmentInfos asList

List of usage examples for org.apache.lucene.index SegmentInfos asList

Introduction

In this page you can find the example usage for org.apache.lucene.index SegmentInfos asList.

Prototype

public List<SegmentCommitInfo> asList() 

Source Link

Document

Returns all contained segments as an unmodifiable List view.

Usage

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/** Returns the merges necessary to optimize the index.
 *  This merge policy defines "optimized" to mean only one
 *  segment in the index, where that segment has no
 *  deletions pending nor separate norms, and it is in
 *  compound file format if the current useCompoundFile
 *  setting is true.  This method returns multiple merges
 *  (mergeFactor at a time) so the {@link org.apache.lucene.index.MergeScheduler}
 *  in use may make use of concurrency. */
@Override/*from   www  . j  a va 2  s .c  o m*/
public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments,
        Map<SegmentInfoPerCommit, Boolean> segmentsToOptimize) throws IOException {

    assert maxNumSegments > 0;

    MergeSpecification spec = null;

    if (!isMerged(infos, maxNumSegments, segmentsToOptimize)) {
        // Find the newest (rightmost) segment that needs to
        // be optimized (other segments may have been flushed
        // since optimize started):
        int last = infos.size();
        while (last > 0) {
            final SegmentInfoPerCommit info = infos.info(--last);
            if (segmentsToOptimize.get(info)) {
                last++;
                break;
            }
        }

        if (last > 0) {
            if (maxNumSegments == 1) {
                // Since we must optimize down to 1 segment, the
                // choice is simple:
                // boolean useCompoundFile = getUseCompoundFile();
                if (last > 1 || !isMerged(infos.info(0))) {
                    spec = new MergeSpecification();
                    spec.add(new OneMerge(infos.asList().subList(0, last)));
                }
            } else if (last > maxNumSegments) {
                // find most balanced merges
                spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge);
            }
        }
    }
    return spec;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

private MergeSpecification findBalancedMerges(SegmentInfos infos, int infoLen, int maxNumSegments,
        boolean partialExpunge) throws IOException {
    if (infoLen <= maxNumSegments)
        return null;

    MergeSpecification spec = new MergeSpecification();
    // boolean useCompoundFile = getUseCompoundFile();

    // use Viterbi algorithm to find the best segmentation.
    // we will try to minimize the size variance of resulting segments.

    double[][] variance = createVarianceTable(infos, infoLen, maxNumSegments);

    final int maxMergeSegments = infoLen - maxNumSegments + 1;
    double[] sumVariance = new double[maxMergeSegments];
    int[][] backLink = new int[maxNumSegments][maxMergeSegments];

    for (int i = (maxMergeSegments - 1); i >= 0; i--) {
        sumVariance[i] = variance[0][i];
        backLink[0][i] = 0;//from  w  w  w  .  java2  s .  c  om
    }
    for (int i = 1; i < maxNumSegments; i++) {
        for (int j = (maxMergeSegments - 1); j >= 0; j--) {
            double minV = Double.MAX_VALUE;
            int minK = 0;
            for (int k = j; k >= 0; k--) {
                double v = sumVariance[k] + variance[i + k][j - k];
                if (v < minV) {
                    minV = v;
                    minK = k;
                }
            }
            sumVariance[j] = minV;
            backLink[i][j] = minK;
        }
    }

    // now, trace back the back links to find all merges,
    // also find a candidate for partial expunge if requested
    int mergeEnd = infoLen;
    int prev = maxMergeSegments - 1;
    int expungeCandidate = -1;
    int maxDelCount = 0;
    for (int i = maxNumSegments - 1; i >= 0; i--) {
        prev = backLink[i][prev];
        int mergeStart = i + prev;
        if ((mergeEnd - mergeStart) > 1) {
            spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
        } else {
            if (partialExpunge) {
                SegmentInfoPerCommit info = infos.info(mergeStart);
                int delCount = info.getDelCount();
                if (delCount > maxDelCount) {
                    expungeCandidate = mergeStart;
                    maxDelCount = delCount;
                }
            }
        }
        mergeEnd = mergeStart;
    }

    if (partialExpunge && maxDelCount > 0) {
        // expunge deletes
        spec.add(new OneMerge(infos.asList().subList(expungeCandidate, expungeCandidate + 1)));
    }

    return spec;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/**
 * Finds merges necessary to expunge all deletes from the
 * index. The number of large segments will stay the same.
 *///from  w w w  . j  ava2s . com
@Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos)
        throws CorruptIndexException, IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments);
    MergeSpecification spec = null;

    if (numLargeSegs < numSegs) {
        List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        spec = super.findForcedDeletesMerges(smallSegments);
    }

    if (spec == null)
        spec = new MergeSpecification();
    for (int i = 0; i < numLargeSegs; i++) {
        SegmentInfoPerCommit info = infos.info(i);
        if (info.hasDeletions()) {
            spec.add(new OneMerge(infos.asList().subList(i, i + 1)));
        }
    }
    return spec;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/** Checks if any merges are now necessary and returns a
 *  {@link org.apache.lucene.index.MergePolicy.MergeSpecification} if so.
 *  This merge policy try to maintain {@link
 *  #setNumLargeSegments} of large segments in similar sizes.
 *  {@link org.apache.lucene.index.LogByteSizeMergePolicy} to small segments.
 *  Small segments are merged and promoted to a large segment
 *  when the total size reaches the average size of large segments.
 *///from   w  w  w . ja  v a  2  s .  c o  m
@Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos) throws IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = _numLargeSegments;

    if (numSegs <= numLargeSegs)
        return null;

    long totalLargeSegSize = 0;
    long totalSmallSegSize = 0;
    SegmentInfoPerCommit info;

    // compute the total size of large segments
    for (int i = 0; i < numLargeSegs; i++) {
        info = infos.info(i);
        totalLargeSegSize += size(info);
    }
    // compute the total size of small segments
    for (int i = numLargeSegs; i < numSegs; i++) {
        info = infos.info(i);
        totalSmallSegSize += size(info);
    }

    long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1));
    if (targetSegSize <= totalSmallSegSize) {
        // the total size of small segments is big enough,
        // promote the small segments to a large segment and do balanced merge,

        if (totalSmallSegSize < targetSegSize * 2) {
            MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1),
                    _partialExpunge);
            if (spec == null)
                spec = new MergeSpecification(); // should not happen
            spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
            return spec;
        } else {
            return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
        }
    } else if (_maxSegments < numSegs) {
        // we have more than _maxSegments, merge small segments smaller than targetSegSize/4
        MergeSpecification spec = new MergeSpecification();
        int startSeg = numLargeSegs;
        long sizeThreshold = (targetSegSize / 4);
        while (startSeg < numSegs) {
            info = infos.info(startSeg);
            if (size(info) < sizeThreshold)
                break;
            startSeg++;
        }
        spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
        return spec;
    } else {
        // apply the log merge policy to small segments.
        List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        MergeSpecification spec = super.findMerges(mergeTrigger, smallSegments);

        if (_partialExpunge) {
            OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs);
            if (expunge != null) {
                if (spec == null)
                    spec = new MergeSpecification();
                spec.add(expunge);
            }
        }
        return spec;
    }
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

private OneMerge findOneSegmentToExpunge(SegmentInfos infos, int maxNumSegments) throws IOException {
    int expungeCandidate = -1;
    int maxDelCount = 0;

    for (int i = maxNumSegments - 1; i >= 0; i--) {
        SegmentInfoPerCommit info = infos.info(i);
        int delCount = info.getDelCount();
        if (delCount > maxDelCount) {
            expungeCandidate = i;/*from w ww  . ja  v  a2 s .  co m*/
            maxDelCount = delCount;
        }
    }
    if (maxDelCount > 0) {
        return new OneMerge(infos.asList().subList(expungeCandidate, expungeCandidate + 1));
    }
    return null;
}

From source file:org.eu.bitzone.Leia.java

License:Apache License

public void showSegments(final Object commitsTable) throws Exception {
    final Object segTable = find("segmentsTable");
    removeAll(segTable);//from  w  ww . j a  v  a2 s .c  o m
    final Object[] rows = getSelectedItems(commitsTable);
    if (rows == null || rows.length == 0) {
        showStatus("No commit point selected.");
        return;
    }
    final Object row = rows[0];
    final IndexCommit commit = (IndexCommit) getProperty(row, "commit");
    if (commit == null) {
        showStatus("Can't retrieve commit point (application error)");
        return;
    }
    final Object segGen = find("segGen");
    setString(segGen, "text", commit.getSegmentsFileName() + " (gen " + commit.getGeneration() + ")");
    final String segName = commit.getSegmentsFileName();
    final SegmentInfos infos = new SegmentInfos();
    try {
        infos.read(dir, segName);
    } catch (final Exception e) {
        e.printStackTrace();
        errorMsg("Error reading segment infos for '" + segName + ": " + e.toString());
        return;
    }
    for (final SegmentCommitInfo si : infos.asList()) {
        final Object r = create("row");
        add(segTable, r);
        Object cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.name);
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", String.valueOf(si.getDelGen()));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", String.valueOf(si.getDelCount()));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", String.valueOf(si.info.getDocCount()));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.getVersion());
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.getCodec().getName());
        final long size = si.sizeInBytes();
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", Util.normalizeSize(size) + Util.normalizeUnit(size));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.getUseCompoundFile() ? "Y" : "N");

        putProperty(r, "si", si);
    }
    final Object diagsTable = find("diagsTable");
    removeAll(diagsTable);
}

From source file:org.getopt.luke.Luke.java

License:Apache License

public void showSegments(Object commitsTable) throws Exception {
    Object segTable = find("segmentsTable");
    removeAll(segTable);/*from w w  w .  j ava  2s .co  m*/
    Object[] rows = getSelectedItems(commitsTable);
    if (rows == null || rows.length == 0) {
        showStatus("No commit point selected.");
        return;
    }
    Object row = rows[0];
    IndexCommit commit = (IndexCommit) getProperty(row, "commit");
    if (commit == null) {
        showStatus("Can't retrieve commit point (application error)");
        return;
    }
    Object segGen = find("segGen");
    setString(segGen, "text", commit.getSegmentsFileName() + " (gen " + commit.getGeneration() + ")");
    String segName = commit.getSegmentsFileName();
    SegmentInfos infos = new SegmentInfos();
    try {
        infos.read(dir, segName);
    } catch (Exception e) {
        e.printStackTrace();
        errorMsg("Error reading segment infos for '" + segName + ": " + e.toString());
        return;
    }
    for (SegmentCommitInfo si : infos.asList()) {
        Object r = create("row");
        add(segTable, r);
        Object cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.name);
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", String.valueOf(si.getDelGen()));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", String.valueOf(si.getDelCount()));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", String.valueOf(si.info.getDocCount()));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.getVersion().toString());
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.getCodec().getName());
        long size = si.sizeInBytes();
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", Util.normalizeSize(size) + Util.normalizeUnit(size));
        setChoice(cell, "alignment", "right");
        cell = create("cell");
        add(r, cell);
        setString(cell, "text", si.info.getUseCompoundFile() ? "Y" : "N");

        putProperty(r, "si", si);
    }
    Object diagsTable = find("diagsTable");
    removeAll(diagsTable);
}

From source file:proj.zoie.api.impl.ZoieMergePolicy.java

License:Apache License

/** Returns the merges necessary to optimize the index.
 *  This merge policy defines "optimized" to mean only one
 *  segment in the index, where that segment has no
 *  deletions pending nor separate norms, and it is in
 *  compound file format if the current useCompoundFile
 *  setting is true.  This method returns multiple merges
 *  (mergeFactor at a time) so the {@link MergeScheduler}
 *  in use may make use of concurrency. */
@Override//from   w ww  .j  a  va2 s  . c  o m
public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments,
        Map<SegmentInfo, Boolean> segmentsToOptimize) throws IOException {

    assert maxNumSegments > 0;

    MergeSpecification spec = null;

    if (!isMerged(infos, maxNumSegments, segmentsToOptimize)) {
        // Find the newest (rightmost) segment that needs to
        // be optimized (other segments may have been flushed
        // since optimize started):
        int last = infos.size();
        while (last > 0) {
            final SegmentInfo info = infos.info(--last);
            if (segmentsToOptimize.get(info)) {
                last++;
                break;
            }
        }

        if (last > 0) {
            if (maxNumSegments == 1) {
                // Since we must optimize down to 1 segment, the
                // choice is simple:
                // boolean useCompoundFile = getUseCompoundFile();
                if (last > 1 || !isMerged(infos.info(0))) {
                    spec = new MergeSpecification();
                    spec.add(new OneMerge(infos.asList().subList(0, last)));
                }
            } else if (last > maxNumSegments) {
                // find most balanced merges
                spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge);
            }
        }
    }
    return spec;
}

From source file:proj.zoie.api.impl.ZoieMergePolicy.java

License:Apache License

private MergeSpecification findBalancedMerges(SegmentInfos infos, int infoLen, int maxNumSegments,
        boolean partialExpunge) throws IOException {
    if (infoLen <= maxNumSegments)
        return null;

    MergeSpecification spec = new MergeSpecification();
    // boolean useCompoundFile = getUseCompoundFile();

    // use Viterbi algorithm to find the best segmentation.
    // we will try to minimize the size variance of resulting segments.

    double[][] variance = createVarianceTable(infos, infoLen, maxNumSegments);

    final int maxMergeSegments = infoLen - maxNumSegments + 1;
    double[] sumVariance = new double[maxMergeSegments];
    int[][] backLink = new int[maxNumSegments][maxMergeSegments];

    for (int i = (maxMergeSegments - 1); i >= 0; i--) {
        sumVariance[i] = variance[0][i];
        backLink[0][i] = 0;//w ww. j  a v a  2  s  .  c o  m
    }
    for (int i = 1; i < maxNumSegments; i++) {
        for (int j = (maxMergeSegments - 1); j >= 0; j--) {
            double minV = Double.MAX_VALUE;
            int minK = 0;
            for (int k = j; k >= 0; k--) {
                double v = sumVariance[k] + variance[i + k][j - k];
                if (v < minV) {
                    minV = v;
                    minK = k;
                }
            }
            sumVariance[j] = minV;
            backLink[i][j] = minK;
        }
    }

    // now, trace back the back links to find all merges,
    // also find a candidate for partial expunge if requested
    int mergeEnd = infoLen;
    int prev = maxMergeSegments - 1;
    int expungeCandidate = -1;
    int maxDelCount = 0;
    for (int i = maxNumSegments - 1; i >= 0; i--) {
        prev = backLink[i][prev];
        int mergeStart = i + prev;
        if ((mergeEnd - mergeStart) > 1) {
            spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd)));
        } else {
            if (partialExpunge) {
                SegmentInfo info = infos.info(mergeStart);
                int delCount = info.getDelCount();
                if (delCount > maxDelCount) {
                    expungeCandidate = mergeStart;
                    maxDelCount = delCount;
                }
            }
        }
        mergeEnd = mergeStart;
    }

    if (partialExpunge && maxDelCount > 0) {
        // expunge deletes
        spec.add(new OneMerge(infos.asList().subList(expungeCandidate, expungeCandidate + 1)));
    }

    return spec;
}

From source file:proj.zoie.api.impl.ZoieMergePolicy.java

License:Apache License

/**
 * Finds merges necessary to expunge all deletes from the
 * index. The number of large segments will stay the same.
 *//*from  ww w. ja va2  s  .  com*/
@Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos)
        throws CorruptIndexException, IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments);
    MergeSpecification spec = null;

    if (numLargeSegs < numSegs) {
        List<SegmentInfo> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        spec = super.findForcedDeletesMerges(smallSegments);
    }

    if (spec == null)
        spec = new MergeSpecification();
    for (int i = 0; i < numLargeSegs; i++) {
        SegmentInfo info = infos.info(i);
        if (info.hasDeletions()) {
            spec.add(new OneMerge(infos.asList().subList(i, i + 1)));
        }
    }
    return spec;
}