List of usage examples for org.apache.lucene.index SegmentInfos SegmentInfos
SegmentInfos
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** * Finds merges necessary to expunge all deletes from the * index. The number of large segments will stay the same. *//*from w ww.j av a 2 s.co m*/ @Override public MergeSpecification findForcedDeletesMerges(SegmentInfos infos) throws CorruptIndexException, IOException { final int numSegs = infos.size(); final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments); MergeSpecification spec = null; if (numLargeSegs < numSegs) { List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); spec = super.findForcedDeletesMerges(smallSegments); } if (spec == null) spec = new MergeSpecification(); for (int i = 0; i < numLargeSegs; i++) { SegmentInfoPerCommit info = infos.info(i); if (info.hasDeletions()) { spec.add(new OneMerge(infos.asList().subList(i, i + 1))); } } return spec; }
From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java
License:Apache License
/** Checks if any merges are now necessary and returns a * {@link org.apache.lucene.index.MergePolicy.MergeSpecification} if so. * This merge policy try to maintain {@link * #setNumLargeSegments} of large segments in similar sizes. * {@link org.apache.lucene.index.LogByteSizeMergePolicy} to small segments. * Small segments are merged and promoted to a large segment * when the total size reaches the average size of large segments. *///from ww w . j ava2s. c o m @Override public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos) throws IOException { final int numSegs = infos.size(); final int numLargeSegs = _numLargeSegments; if (numSegs <= numLargeSegs) return null; long totalLargeSegSize = 0; long totalSmallSegSize = 0; SegmentInfoPerCommit info; // compute the total size of large segments for (int i = 0; i < numLargeSegs; i++) { info = infos.info(i); totalLargeSegSize += size(info); } // compute the total size of small segments for (int i = numLargeSegs; i < numSegs; i++) { info = infos.info(i); totalSmallSegSize += size(info); } long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1)); if (targetSegSize <= totalSmallSegSize) { // the total size of small segments is big enough, // promote the small segments to a large segment and do balanced merge, if (totalSmallSegSize < targetSegSize * 2) { MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1), _partialExpunge); if (spec == null) spec = new MergeSpecification(); // should not happen spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs))); return spec; } else { return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge); } } else if (_maxSegments < numSegs) { // we have more than _maxSegments, merge small segments smaller than targetSegSize/4 MergeSpecification spec = new MergeSpecification(); int startSeg = numLargeSegs; long sizeThreshold = (targetSegSize / 4); while (startSeg < numSegs) { info = infos.info(startSeg); if (size(info) < sizeThreshold) break; startSeg++; } spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs))); return spec; } else { // apply the log merge policy to small segments. List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs); SegmentInfos smallSegments = new SegmentInfos(); smallSegments.addAll(smallSegmentList); MergeSpecification spec = super.findMerges(mergeTrigger, smallSegments); if (_partialExpunge) { OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs); if (expunge != null) { if (spec == null) spec = new MergeSpecification(); spec.add(expunge); } } return spec; } }
From source file:cn.hbu.cs.esearch.util.IndexUtil.java
License:Apache License
public static int getNumSegments(Directory idx) throws IOException { SegmentInfos infos = new SegmentInfos(); infos.read(idx);//from w w w. j a va2 s .c o m return infos.size(); }
From source file:cn.hbu.cs.esearch.util.IndexUtil.java
License:Apache License
public static String getSegmentsInfo(Directory idx) { SegmentInfos infos = new SegmentInfos(); try {//from ww w .ja v a 2s . c o m infos.read(idx); StringBuilder buf = new StringBuilder(); for (int i = 0; i < infos.size(); i++) { SegmentInfoPerCommit segInfo = infos.info(i); buf.append("[").append(segInfo.info.name).append(",numDoc:").append(segInfo.info.getDocCount()) .append(",numDel:").append(segInfo.getDelCount()).append("]"); } return buf.toString(); } catch (Exception e) { return e.toString(); } }
From source file:com.senseidb.clue.commands.SegmentsCommand.java
License:Apache License
@Override public void execute(String[] args, PrintStream out) throws Exception { SegmentInfos sis = new SegmentInfos(); sis.read(ctx.getDirectory());//from ww w .j ava 2 s.co m for (SegmentCommitInfo sci : sis) { out.print(sci.info.toString()); out.print(" "); out.print(sci.info.getDocCount()); out.print(" ("); out.print(sci.getDelCount()); out.println(")"); } }
From source file:org.apache.blur.mapreduce.lib.BlurInputFormat.java
License:Apache License
public static List<BlurInputSplit> getSplitForDirectory(Path shardDir, Configuration configuration, Text table, Text snapshot, Directory directory) throws IOException { List<BlurInputSplit> splits = new ArrayList<BlurInputSplit>(); SnapshotIndexDeletionPolicy policy = new SnapshotIndexDeletionPolicy(configuration, SnapshotIndexDeletionPolicy.getGenerationsPath(shardDir)); Long generation = policy.getGeneration(snapshot.toString()); if (generation == null) { throw new IOException("Snapshot [" + snapshot + "] not found in shard [" + shardDir + "]"); }/*w w w . ja va 2 s . c om*/ List<IndexCommit> listCommits = DirectoryReader.listCommits(directory); IndexCommit indexCommit = findIndexCommit(listCommits, generation, shardDir); String segmentsFileName = indexCommit.getSegmentsFileName(); SegmentInfos segmentInfos = new SegmentInfos(); segmentInfos.read(directory, segmentsFileName); for (SegmentInfoPerCommit commit : segmentInfos) { SegmentInfo segmentInfo = commit.info; if (commit.getDelCount() == segmentInfo.getDocCount()) { LOG.info("Segment [{0}] in dir [{1}] has all records deleted.", segmentInfo.name, shardDir); } else { String name = segmentInfo.name; Collection<String> files = commit.files(); long fileLength = 0; for (String file : files) { fileLength += directory.fileLength(file); } List<String> dirFiles = new ArrayList<String>(files); dirFiles.add(segmentsFileName); splits.add(new BlurInputSplit(shardDir, segmentsFileName, name, fileLength, table, dirFiles)); } } return splits; }
From source file:org.apache.luke.client.LukeInspector.java
License:Apache License
/** * open Lucene index and re-init all the sub-widgets * @param name//from w w w . j av a 2 s . c o m * @param force * @param dirImpl * @param ro * @param ramdir * @param keepCommits * @param point * @param tiiDivisor */ public void openIndex(String name, boolean force, String dirImpl, boolean ro, boolean ramdir, boolean keepCommits, IndexCommit point, int tiiDivisor) { pName = name; File baseFileDir = new File(name); ArrayList<Directory> dirs = new ArrayList<Directory>(); Throwable lastException = null; try { Directory d = openDirectory(dirImpl, pName, false); if (IndexWriter.isLocked(d)) { if (!ro) { if (force) { IndexWriter.unlock(d); } else { //errorMsg("Index is locked. Try 'Force unlock' when opening."); d.close(); d = null; return; } } } boolean existsSingle = false; // IR.indexExists doesn't report the cause of error try { new SegmentInfos().read(d); existsSingle = true; } catch (Throwable e) { e.printStackTrace(); lastException = e; // } if (!existsSingle) { // try multi File[] files = baseFileDir.listFiles(); for (File f : files) { if (f.isFile()) { continue; } Directory d1 = openDirectory(dirImpl, f.toString(), false); if (IndexWriter.isLocked(d1)) { if (!ro) { if (force) { IndexWriter.unlock(d1); } else { //errorMsg("Index is locked. Try 'Force unlock' when opening."); d1.close(); d1 = null; return; } } } existsSingle = false; try { new SegmentInfos().read(d1); existsSingle = true; } catch (Throwable e) { lastException = e; e.printStackTrace(); } if (!existsSingle) { d1.close(); continue; } dirs.add(d1); } } else { dirs.add(d); } if (dirs.size() == 0) { if (lastException != null) { //errorMsg("Invalid directory at the location, check console for more information. Last exception:\n" + lastException.toString()); } else { //errorMsg("No valid directory at the location, try another location.\nCheck console for other possible causes."); } return; } if (ramdir) { //showStatus("Loading index into RAMDirectory ..."); Directory dir1 = new RAMDirectory(); IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV)); IndexWriter iw1 = new IndexWriter(dir1, cfg); iw1.addIndexes((Directory[]) dirs.toArray(new Directory[dirs.size()])); iw1.close(); //showStatus("RAMDirectory loading done!"); if (dir != null) dir.close(); dirs.clear(); dirs.add(dir1); } IndexDeletionPolicy policy; if (keepCommits) { policy = new KeepAllIndexDeletionPolicy(); } else { policy = new KeepLastIndexDeletionPolicy(); } ArrayList<DirectoryReader> readers = new ArrayList<DirectoryReader>(); for (Directory dd : dirs) { DirectoryReader reader; if (tiiDivisor > 1) { reader = DirectoryReader.open(dd, tiiDivisor); } else { reader = DirectoryReader.open(dd); } readers.add(reader); } if (readers.size() == 1) { ir = readers.get(0); dir = ((DirectoryReader) ir).directory(); } else { ir = new MultiReader((IndexReader[]) readers.toArray(new IndexReader[readers.size()])); } is = new IndexSearcher(ir); // XXX //slowAccess = false; //initOverview(); //initPlugins(); //showStatus("Index successfully open."); } catch (Exception e) { e.printStackTrace(); //errorMsg(e.getMessage()); return; } }
From source file:org.apache.mahout.text.LuceneSegmentInputFormat.java
License:Apache License
@Override public List<LuceneSegmentInputSplit> getSplits(JobContext context) throws IOException, InterruptedException { Configuration configuration = context.getConfiguration(); LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration); List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList(); List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths(); for (Path indexPath : indexPaths) { ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath, false, configuration); SegmentInfos segmentInfos = new SegmentInfos(); segmentInfos.read(directory);/*from www . j a v a 2s . co m*/ for (SegmentCommitInfo segmentInfo : segmentInfos) { LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name, segmentInfo.sizeInBytes()); inputSplits.add(inputSplit); LOG.info("Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(), indexPath.toUri(), segmentInfo.info.name); } } return inputSplits; }
From source file:org.apache.mahout.text.LuceneSegmentInputSplit.java
License:Apache License
/** * Get the {@link SegmentInfo} of this {@link InputSplit} via the given {@link Configuration} * * @param configuration the configuration used to locate the index * @return the segment info or throws exception if not found * @throws IOException if an error occurs when accessing the directory *//*from ww w. j ava2s . com*/ public SegmentCommitInfo getSegment(Configuration configuration) throws IOException { ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration), indexPath, false, configuration); SegmentInfos segmentInfos = new SegmentInfos(); segmentInfos.read(directory); for (SegmentCommitInfo segmentInfo : segmentInfos) { if (segmentInfo.info.name.equals(segmentInfoName)) { return segmentInfo; } } throw new IllegalArgumentException( "No such segment: '" + segmentInfoName + "' in directory " + directory.toString()); }
From source file:org.apache.mahout.text.LuceneSegmentRecordReaderTest.java
License:Apache License
@Before public void before() throws IOException, InterruptedException { LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(getConfiguration(), asList(getIndexPath1()), new Path("output"), ID_FIELD, asList(FIELD)); configuration = lucene2SeqConf.serialize(); recordReader = new LuceneSegmentRecordReader(); commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500)); commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(500, 1000)); segmentInfos = new SegmentInfos(); segmentInfos.read(getDirectory(getIndexPath1AsFile())); }