List of usage examples for org.apache.lucene.index SegmentInfos asList
public List<SegmentCommitInfo> asList()
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** Returns the merges necessary to optimize the index. * This merge policy defines "optimized" to mean only one * segment in the index, where that segment has no * deletions pending nor separate norms, and it is in * compound file format if the current useCompoundFile * setting is true. This method returns multiple merges * (mergeFactor at a time) so the {@link org.apache.lucene.index.MergeScheduler} * in use may make use of concurrency. */ @Override/*from www . j a va 2 s .c o m*/ public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments, Map<SegmentInfoPerCommit, Boolean> segmentsToOptimize) throws IOException { assert maxNumSegments > 0; MergeSpecification spec = null; if (!isMerged(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.size(); while (last > 0) { final SegmentInfoPerCommit info = infos.info(--last); if (segmentsToOptimize.get(info)) { last++; break; } } if (last > 0) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: // boolean useCompoundFile = getUseCompoundFile(); if (last > 1 || !isMerged(infos.info(0))) { spec = new MergeSpecification(); spec.add(new OneMerge(infos.asList().subList(0, last))); } } else if (last > maxNumSegments) { // find most balanced merges spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge); } } } return spec; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
private MergeSpecification findBalancedMerges(SegmentInfos infos, int infoLen, int maxNumSegments, boolean partialExpunge) throws IOException { if (infoLen <= maxNumSegments) return null; MergeSpecification spec = new MergeSpecification(); // boolean useCompoundFile = getUseCompoundFile(); // use Viterbi algorithm to find the best segmentation. // we will try to minimize the size variance of resulting segments. double[][] variance = createVarianceTable(infos, infoLen, maxNumSegments); final int maxMergeSegments = infoLen - maxNumSegments + 1; double[] sumVariance = new double[maxMergeSegments]; int[][] backLink = new int[maxNumSegments][maxMergeSegments]; for (int i = (maxMergeSegments - 1); i >= 0; i--) { sumVariance[i] = variance[0][i]; backLink[0][i] = 0;//from w w w . java2 s . c om } for (int i = 1; i < maxNumSegments; i++) { for (int j = (maxMergeSegments - 1); j >= 0; j--) { double minV = Double.MAX_VALUE; int minK = 0; for (int k = j; k >= 0; k--) { double v = sumVariance[k] + variance[i + k][j - k]; if (v < minV) { minV = v; minK = k; } } sumVariance[j] = minV; backLink[i][j] = minK; } } // now, trace back the back links to find all merges, // also find a candidate for partial expunge if requested int mergeEnd = infoLen; int prev = maxMergeSegments - 1; int expungeCandidate = -1; int maxDelCount = 0; for (int i = maxNumSegments - 1; i >= 0; i--) { prev = backLink[i][prev]; int mergeStart = i + prev; if ((mergeEnd - mergeStart) > 1) { spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd))); } else { if (partialExpunge) { SegmentInfoPerCommit info = infos.info(mergeStart); int delCount = info.getDelCount(); if (delCount > maxDelCount) { expungeCandidate = mergeStart; maxDelCount = delCount; } } } mergeEnd = mergeStart; } if (partialExpunge && maxDelCount > 0) { // expunge deletes spec.add(new OneMerge(infos.asList().subList(expungeCandidate, expungeCandidate + 1))); } return spec; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** * Finds merges necessary to expunge all deletes from the * index. The number of large segments will stay the same. *///from w w w . j ava2s . com @Override public MergeSpecification findForcedDeletesMerges(SegmentInfos infos) throws CorruptIndexException, IOException { final int numSegs = infos.size(); final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments); MergeSpecification spec = null; if (numLargeSegs < numSegs) { List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); spec = super.findForcedDeletesMerges(smallSegments); } if (spec == null) spec = new MergeSpecification(); for (int i = 0; i < numLargeSegs; i++) { SegmentInfoPerCommit info = infos.info(i); if (info.hasDeletions()) { spec.add(new OneMerge(infos.asList().subList(i, i + 1))); } } return spec; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** Checks if any merges are now necessary and returns a * {@link org.apache.lucene.index.MergePolicy.MergeSpecification} if so. * This merge policy try to maintain {@link * #setNumLargeSegments} of large segments in similar sizes. * {@link org.apache.lucene.index.LogByteSizeMergePolicy} to small segments. * Small segments are merged and promoted to a large segment * when the total size reaches the average size of large segments. *///from w w w . ja v a 2 s . c o m @Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos) throws IOException { final int numSegs = infos.size(); final int numLargeSegs = _numLargeSegments; if (numSegs <= numLargeSegs) return null; long totalLargeSegSize = 0; long totalSmallSegSize = 0; SegmentInfoPerCommit info; // compute the total size of large segments for (int i = 0; i < numLargeSegs; i++) { info = infos.info(i); totalLargeSegSize += size(info); } // compute the total size of small segments for (int i = numLargeSegs; i < numSegs; i++) { info = infos.info(i); totalSmallSegSize += size(info); } long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1)); if (targetSegSize <= totalSmallSegSize) { // the total size of small segments is big enough, // promote the small segments to a large segment and do balanced merge, if (totalSmallSegSize < targetSegSize * 2) { MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge); if (spec == null) spec = new MergeSpecification(); // should not happen spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs))); return spec; } else { return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge); } } else if (_maxSegments < numSegs) { // we have more than _maxSegments, merge small segments smaller than targetSegSize/4 MergeSpecification spec = new MergeSpecification(); int startSeg = numLargeSegs; long sizeThreshold = (targetSegSize / 4); while (startSeg < numSegs) { info = infos.info(startSeg); if (size(info) < sizeThreshold) break; startSeg++; } spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs))); return spec; } else { // apply the log merge policy to small segments. List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); MergeSpecification spec = super.findMerges(mergeTrigger, smallSegments); if (_partialExpunge) { OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs); if (expunge != null) { if (spec == null) spec = new MergeSpecification(); spec.add(expunge); } } return spec; } }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
private OneMerge findOneSegmentToExpunge(SegmentInfos infos, int maxNumSegments) throws IOException { int expungeCandidate = -1; int maxDelCount = 0; for (int i = maxNumSegments - 1; i >= 0; i--) { SegmentInfoPerCommit info = infos.info(i); int delCount = info.getDelCount(); if (delCount > maxDelCount) { expungeCandidate = i;/*from w ww . ja v a2 s . co m*/ maxDelCount = delCount; } } if (maxDelCount > 0) { return new OneMerge(infos.asList().subList(expungeCandidate, expungeCandidate + 1)); } return null; }
From source file:org.eu.bitzone.Leia.java
License:Apache License
public void showSegments(final Object commitsTable) throws Exception { final Object segTable = find("segmentsTable"); removeAll(segTable);//from w ww . j a v a2 s .c o m final Object[] rows = getSelectedItems(commitsTable); if (rows == null || rows.length == 0) { showStatus("No commit point selected."); return; } final Object row = rows[0]; final IndexCommit commit = (IndexCommit) getProperty(row, "commit"); if (commit == null) { showStatus("Can't retrieve commit point (application error)"); return; } final Object segGen = find("segGen"); setString(segGen, "text", commit.getSegmentsFileName() + " (gen " + commit.getGeneration() + ")"); final String segName = commit.getSegmentsFileName(); final SegmentInfos infos = new SegmentInfos(); try { infos.read(dir, segName); } catch (final Exception e) { e.printStackTrace(); errorMsg("Error reading segment infos for '" + segName + ": " + e.toString()); return; } for (final SegmentCommitInfo si : infos.asList()) { final Object r = create("row"); add(segTable, r); Object cell = create("cell"); add(r, cell); setString(cell, "text", si.info.name); cell = create("cell"); add(r, cell); setString(cell, "text", String.valueOf(si.getDelGen())); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", String.valueOf(si.getDelCount())); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", String.valueOf(si.info.getDocCount())); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", si.info.getVersion()); cell = create("cell"); add(r, cell); setString(cell, "text", si.info.getCodec().getName()); final long size = si.sizeInBytes(); cell = create("cell"); add(r, cell); setString(cell, "text", Util.normalizeSize(size) + Util.normalizeUnit(size)); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", si.info.getUseCompoundFile() ? "Y" : "N"); putProperty(r, "si", si); } final Object diagsTable = find("diagsTable"); removeAll(diagsTable); }
From source file:org.getopt.luke.Luke.java
License:Apache License
public void showSegments(Object commitsTable) throws Exception { Object segTable = find("segmentsTable"); removeAll(segTable);/*from w w w . j ava 2s .co m*/ Object[] rows = getSelectedItems(commitsTable); if (rows == null || rows.length == 0) { showStatus("No commit point selected."); return; } Object row = rows[0]; IndexCommit commit = (IndexCommit) getProperty(row, "commit"); if (commit == null) { showStatus("Can't retrieve commit point (application error)"); return; } Object segGen = find("segGen"); setString(segGen, "text", commit.getSegmentsFileName() + " (gen " + commit.getGeneration() + ")"); String segName = commit.getSegmentsFileName(); SegmentInfos infos = new SegmentInfos(); try { infos.read(dir, segName); } catch (Exception e) { e.printStackTrace(); errorMsg("Error reading segment infos for '" + segName + ": " + e.toString()); return; } for (SegmentCommitInfo si : infos.asList()) { Object r = create("row"); add(segTable, r); Object cell = create("cell"); add(r, cell); setString(cell, "text", si.info.name); cell = create("cell"); add(r, cell); setString(cell, "text", String.valueOf(si.getDelGen())); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", String.valueOf(si.getDelCount())); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", String.valueOf(si.info.getDocCount())); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", si.info.getVersion().toString()); cell = create("cell"); add(r, cell); setString(cell, "text", si.info.getCodec().getName()); long size = si.sizeInBytes(); cell = create("cell"); add(r, cell); setString(cell, "text", Util.normalizeSize(size) + Util.normalizeUnit(size)); setChoice(cell, "alignment", "right"); cell = create("cell"); add(r, cell); setString(cell, "text", si.info.getUseCompoundFile() ? "Y" : "N"); putProperty(r, "si", si); } Object diagsTable = find("diagsTable"); removeAll(diagsTable); }
From source file:proj.zoie.api.impl.ZoieMergePolicy.java
License:Apache License
/** Returns the merges necessary to optimize the index. * This merge policy defines "optimized" to mean only one * segment in the index, where that segment has no * deletions pending nor separate norms, and it is in * compound file format if the current useCompoundFile * setting is true. This method returns multiple merges * (mergeFactor at a time) so the {@link MergeScheduler} * in use may make use of concurrency. */ @Override//from w ww .j a va2 s . c o m public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments, Map<SegmentInfo, Boolean> segmentsToOptimize) throws IOException { assert maxNumSegments > 0; MergeSpecification spec = null; if (!isMerged(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.size(); while (last > 0) { final SegmentInfo info = infos.info(--last); if (segmentsToOptimize.get(info)) { last++; break; } } if (last > 0) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: // boolean useCompoundFile = getUseCompoundFile(); if (last > 1 || !isMerged(infos.info(0))) { spec = new MergeSpecification(); spec.add(new OneMerge(infos.asList().subList(0, last))); } } else if (last > maxNumSegments) { // find most balanced merges spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge); } } } return spec; }
From source file:proj.zoie.api.impl.ZoieMergePolicy.java
License:Apache License
private MergeSpecification findBalancedMerges(SegmentInfos infos, int infoLen, int maxNumSegments, boolean partialExpunge) throws IOException { if (infoLen <= maxNumSegments) return null; MergeSpecification spec = new MergeSpecification(); // boolean useCompoundFile = getUseCompoundFile(); // use Viterbi algorithm to find the best segmentation. // we will try to minimize the size variance of resulting segments. double[][] variance = createVarianceTable(infos, infoLen, maxNumSegments); final int maxMergeSegments = infoLen - maxNumSegments + 1; double[] sumVariance = new double[maxMergeSegments]; int[][] backLink = new int[maxNumSegments][maxMergeSegments]; for (int i = (maxMergeSegments - 1); i >= 0; i--) { sumVariance[i] = variance[0][i]; backLink[0][i] = 0;//w ww. j a v a 2 s . c o m } for (int i = 1; i < maxNumSegments; i++) { for (int j = (maxMergeSegments - 1); j >= 0; j--) { double minV = Double.MAX_VALUE; int minK = 0; for (int k = j; k >= 0; k--) { double v = sumVariance[k] + variance[i + k][j - k]; if (v < minV) { minV = v; minK = k; } } sumVariance[j] = minV; backLink[i][j] = minK; } } // now, trace back the back links to find all merges, // also find a candidate for partial expunge if requested int mergeEnd = infoLen; int prev = maxMergeSegments - 1; int expungeCandidate = -1; int maxDelCount = 0; for (int i = maxNumSegments - 1; i >= 0; i--) { prev = backLink[i][prev]; int mergeStart = i + prev; if ((mergeEnd - mergeStart) > 1) { spec.add(new OneMerge(infos.asList().subList(mergeStart, mergeEnd))); } else { if (partialExpunge) { SegmentInfo info = infos.info(mergeStart); int delCount = info.getDelCount(); if (delCount > maxDelCount) { expungeCandidate = mergeStart; maxDelCount = delCount; } } } mergeEnd = mergeStart; } if (partialExpunge && maxDelCount > 0) { // expunge deletes spec.add(new OneMerge(infos.asList().subList(expungeCandidate, expungeCandidate + 1))); } return spec; }
From source file:proj.zoie.api.impl.ZoieMergePolicy.java
License:Apache License
/** * Finds merges necessary to expunge all deletes from the * index. The number of large segments will stay the same. *//*from ww w. ja va2 s . com*/ @Override public MergeSpecification findForcedDeletesMerges(SegmentInfos infos) throws CorruptIndexException, IOException { final int numSegs = infos.size(); final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments); MergeSpecification spec = null; if (numLargeSegs < numSegs) { List<SegmentInfo> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); spec = super.findForcedDeletesMerges(smallSegments); } if (spec == null) spec = new MergeSpecification(); for (int i = 0; i < numLargeSegs; i++) { SegmentInfo info = infos.info(i); if (info.hasDeletions()) { spec.add(new OneMerge(infos.asList().subList(i, i + 1))); } } return spec; }