List of usage examples for org.apache.lucene.index SegmentInfos size
public int size()
From source file:axiom.objectmodel.dom.IndexOptimizingRunner.java
License:Open Source License
protected int segmentCount() { int count = 0; SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(this.directory); if (sinfos != null) { count = sinfos.size(); }//from w w w . j a va 2 s. c om return count; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
@Override protected boolean isMerged(SegmentInfos infos, int maxNumSegments, Map<SegmentInfoPerCommit, Boolean> segmentsToOptimize) throws IOException { final int numSegments = infos.size(); int numToOptimize = 0; SegmentInfoPerCommit optimizeInfo = null; for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { final SegmentInfoPerCommit info = infos.info(i); if (segmentsToOptimize.get(info)) { numToOptimize++;// w w w . ja v a 2 s . com optimizeInfo = info; } } return numToOptimize <= maxNumSegments && (numToOptimize != 1 || isMerged(optimizeInfo)); }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** Returns the merges necessary to optimize the index. * This merge policy defines "optimized" to mean only one * segment in the index, where that segment has no * deletions pending nor separate norms, and it is in * compound file format if the current useCompoundFile * setting is true. This method returns multiple merges * (mergeFactor at a time) so the {@link org.apache.lucene.index.MergeScheduler} * in use may make use of concurrency. */ @Override/*w w w . j ava 2 s . c o m*/ public MergeSpecification findForcedMerges(SegmentInfos infos, int maxNumSegments, Map<SegmentInfoPerCommit, Boolean> segmentsToOptimize) throws IOException { assert maxNumSegments > 0; MergeSpecification spec = null; if (!isMerged(infos, maxNumSegments, segmentsToOptimize)) { // Find the newest (rightmost) segment that needs to // be optimized (other segments may have been flushed // since optimize started): int last = infos.size(); while (last > 0) { final SegmentInfoPerCommit info = infos.info(--last); if (segmentsToOptimize.get(info)) { last++; break; } } if (last > 0) { if (maxNumSegments == 1) { // Since we must optimize down to 1 segment, the // choice is simple: // boolean useCompoundFile = getUseCompoundFile(); if (last > 1 || !isMerged(infos.info(0))) { spec = new MergeSpecification(); spec.add(new OneMerge(infos.asList().subList(0, last))); } } else if (last > maxNumSegments) { // find most balanced merges spec = findBalancedMerges(infos, last, maxNumSegments, _partialExpunge); } } } return spec; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** * Finds merges necessary to expunge all deletes from the * index. The number of large segments will stay the same. */// www . j av a 2 s. co m @Override public MergeSpecification findForcedDeletesMerges(SegmentInfos infos) throws CorruptIndexException, IOException { final int numSegs = infos.size(); final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments); MergeSpecification spec = null; if (numLargeSegs < numSegs) { List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); spec = super.findForcedDeletesMerges(smallSegments); } if (spec == null) spec = new MergeSpecification(); for (int i = 0; i < numLargeSegs; i++) { SegmentInfoPerCommit info = infos.info(i); if (info.hasDeletions()) { spec.add(new OneMerge(infos.asList().subList(i, i + 1))); } } return spec; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** Checks if any merges are now necessary and returns a * {@link org.apache.lucene.index.MergePolicy.MergeSpecification} if so. * This merge policy try to maintain {@link * #setNumLargeSegments} of large segments in similar sizes. * {@link org.apache.lucene.index.LogByteSizeMergePolicy} to small segments. * Small segments are merged and promoted to a large segment * when the total size reaches the average size of large segments. *//* ww w. jav a 2 s . c om*/ @Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos) throws IOException { final int numSegs = infos.size(); final int numLargeSegs = _numLargeSegments; if (numSegs <= numLargeSegs) return null; long totalLargeSegSize = 0; long totalSmallSegSize = 0; SegmentInfoPerCommit info; // compute the total size of large segments for (int i = 0; i < numLargeSegs; i++) { info = infos.info(i); totalLargeSegSize += size(info); } // compute the total size of small segments for (int i = numLargeSegs; i < numSegs; i++) { info = infos.info(i); totalSmallSegSize += size(info); } long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1)); if (targetSegSize <= totalSmallSegSize) { // the total size of small segments is big enough, // promote the small segments to a large segment and do balanced merge, if (totalSmallSegSize < targetSegSize * 2) { MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge); if (spec == null) spec = new MergeSpecification(); // should not happen spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs))); return spec; } else { return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge); } } else if (_maxSegments < numSegs) { // we have more than _maxSegments, merge small segments smaller than targetSegSize/4 MergeSpecification spec = new MergeSpecification(); int startSeg = numLargeSegs; long sizeThreshold = (targetSegSize / 4); while (startSeg < numSegs) { info = infos.info(startSeg); if (size(info) < sizeThreshold) break; startSeg++; } spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs))); return spec; } else { // apply the log merge policy to small segments. List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); MergeSpecification spec = super.findMerges(mergeTrigger, smallSegments); if (_partialExpunge) { OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs); if (expunge != null) { if (spec == null) spec = new MergeSpecification(); spec.add(expunge); } } return spec; } }
From source file:cn.hbu.cs.esearch.util.IndexUtil.java
License:Apache License
public static int getNumSegments(Directory idx) throws IOException { SegmentInfos infos = new SegmentInfos(); infos.read(idx);// w ww . j a va 2s.c o m return infos.size(); }
From source file:cn.hbu.cs.esearch.util.IndexUtil.java
License:Apache License
public static String getSegmentsInfo(Directory idx) { SegmentInfos infos = new SegmentInfos(); try {/*w ww . ja v a 2 s . co m*/ infos.read(idx); StringBuilder buf = new StringBuilder(); for (int i = 0; i < infos.size(); i++) { SegmentInfoPerCommit segInfo = infos.info(i); buf.append("[").append(segInfo.info.name).append(",numDoc:").append(segInfo.info.getDocCount()) .append(",numDel:").append(segInfo.getDelCount()).append("]"); } return buf.toString(); } catch (Exception e) { return e.toString(); } }
From source file:org.apache.solr.core.TestSimpleTextCodec.java
License:Apache License
public void test() throws Exception { SolrConfig config = h.getCore().getSolrConfig(); String codecFactory = config.get("codecFactory/@class"); assertEquals("Unexpected solrconfig codec factory", "solr.SimpleTextCodecFactory", codecFactory); assertEquals("Unexpected core codec", "SimpleText", h.getCore().getCodec().getName()); RefCounted<IndexWriter> writerRef = h.getCore().getSolrCoreState().getIndexWriter(h.getCore()); try {/*from ww w.j ava 2 s . c o m*/ IndexWriter writer = writerRef.get(); assertEquals("Unexpected codec in IndexWriter config", "SimpleText", writer.getConfig().getCodec().getName()); } finally { writerRef.decref(); } assertU(add(doc("id", "1", "text", "textual content goes here"))); assertU(commit()); RefCounted<SolrIndexSearcher> searcherRef = h.getCore().getSearcher(); try { SolrIndexSearcher searcher = searcherRef.get(); SegmentInfos infos = SegmentInfos.readLatestCommit(searcher.getIndexReader().directory()); SegmentInfo info = infos.info(infos.size() - 1).info; assertEquals("Unexpected segment codec", "SimpleText", info.getCodec().getName()); } finally { searcherRef.decref(); } assertQ(req("q", "id:1"), "*[count(//doc)=1]"); }
From source file:org.dspace.app.util.IndexVersion.java
License:BSD License
/** * Determine the version of Solr/Lucene which was used to create a given index directory. * //from w ww. j ava 2 s. c om * @param indexDirPath * Full path of the Solr/Lucene index directory * @return version as a string (e.g. "4.4"), empty string ("") if index directory is empty, * or null if directory doesn't exist. * @throws IOException */ public static String getIndexVersion(String indexDirPath) throws IOException { String indexVersion = null; // Make sure this directory exists File dir = new File(indexDirPath); if (dir.exists() && dir.isDirectory()) { // Check if this index directory has any contents String[] dirContents = dir.list(); // If this directory is empty, return an empty string. // It is a valid directory, but it's an empty index. if (dirContents != null && dirContents.length == 0) { return ""; } // Open this index directory in Lucene Directory indexDir = FSDirectory.open(dir); // Get info on the Lucene segment file(s) in index directory SegmentInfos sis = new SegmentInfos(); try { sis.read(indexDir); } catch (IOException ie) { // Wrap default IOException, providing more info about which directory cannot be read throw new IOException("Could not read Lucene segments files in " + dir.getAbsolutePath(), ie); } // If we have a valid Solr index dir, but it has no existing segments // then just return an empty string. It's a valid but empty index. if (sis != null && sis.size() == 0) { return ""; } // Loop through our Lucene segment files to locate the OLDEST // version. It is possible for individual segment files to be // created by different versions of Lucene. So, we just need // to find the oldest version of Lucene which created these // index segment files. // This logic borrowed from Lucene v.4.10 CheckIndex class: // https://github.com/apache/lucene-solr/blob/lucene_solr_4_10/lucene/core/src/java/org/apache/lucene/index/CheckIndex.java#L426 // WARNING: It MAY require updating whenever we upgrade the // "lucene.version" in our DSpace Parent POM Version oldest = null; Version oldSegment = null; for (SegmentCommitInfo si : sis) { // Get the version of Lucene which created this segment file Version version = si.info.getVersion(); if (version == null) { // If null, then this is a pre-3.1 segment file. // For our purposes, we will just assume it is "3.0", // This lets us know we will need to upgrade it to 3.5 // before upgrading to Solr/Lucene 4.x or above try { oldSegment = Version.parse("3.0"); } catch (ParseException pe) { throw new IOException(pe); } } // else if this segment is older than our oldest thus far else if (oldest == null || version.onOrAfter(oldest) == false) { // We have a new oldest segment version oldest = version; } } // If we found a really old segment, compare it to the oldest // to see which is actually older if (oldSegment != null && oldSegment.onOrAfter(oldest) == false) { oldest = oldSegment; } // At this point, we should know what version of Lucene created our // oldest segment file. We will return this as the Index version // as it's the oldest segment we will need to upgrade. if (oldest != null) { indexVersion = oldest.toString(); } } return indexVersion; }
From source file:org.elasticsearch.index.merge.policy.BalancedSegmentMergePolicy.java
License:Apache License
private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set<SegmentInfo> segmentsToOptimize) throws IOException { final int numSegments = infos.size(); int numToOptimize = 0; SegmentInfo optimizeInfo = null;/* www . jav a2s . c om*/ for (int i = 0; i < numSegments && numToOptimize <= maxNumSegments; i++) { final SegmentInfo info = infos.info(i); if (segmentsToOptimize.contains(info)) { numToOptimize++; optimizeInfo = info; } } return numToOptimize <= maxNumSegments && (numToOptimize != 1 || isOptimized(writer, optimizeInfo)); }