Example usage for org.apache.lucene.index SegmentInfos SegmentInfos

List of usage examples for org.apache.lucene.index SegmentInfos SegmentInfos

Introduction

In this page you can find the example usage for org.apache.lucene.index SegmentInfos SegmentInfos.

Prototype

SegmentInfos

Source Link

Usage

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/**
 * Finds merges necessary to expunge all deletes from the
 * index. The number of large segments will stay the same.
 *//*from   w ww.j  av  a  2  s.co m*/
@Override
public MergeSpecification findForcedDeletesMerges(SegmentInfos infos)
        throws CorruptIndexException, IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = (numSegs < _numLargeSegments ? numSegs : _numLargeSegments);
    MergeSpecification spec = null;

    if (numLargeSegs < numSegs) {
        List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        spec = super.findForcedDeletesMerges(smallSegments);
    }

    if (spec == null)
        spec = new MergeSpecification();
    for (int i = 0; i < numLargeSegs; i++) {
        SegmentInfoPerCommit info = infos.info(i);
        if (info.hasDeletions()) {
            spec.add(new OneMerge(infos.asList().subList(i, i + 1)));
        }
    }
    return spec;
}

From source file:cn.hbu.cs.esearch.core.EsearchMergePolicy.java

License:Apache License

/** Checks if any merges are now necessary and returns a
 *  {@link org.apache.lucene.index.MergePolicy.MergeSpecification} if so.
 *  This merge policy try to maintain {@link
 *  #setNumLargeSegments} of large segments in similar sizes.
 *  {@link org.apache.lucene.index.LogByteSizeMergePolicy} to small segments.
 *  Small segments are merged and promoted to a large segment
 *  when the total size reaches the average size of large segments.
 *///from  ww  w  . j  ava2s. c o  m
@Override
public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos) throws IOException {
    final int numSegs = infos.size();
    final int numLargeSegs = _numLargeSegments;

    if (numSegs <= numLargeSegs)
        return null;

    long totalLargeSegSize = 0;
    long totalSmallSegSize = 0;
    SegmentInfoPerCommit info;

    // compute the total size of large segments
    for (int i = 0; i < numLargeSegs; i++) {
        info = infos.info(i);
        totalLargeSegSize += size(info);
    }
    // compute the total size of small segments
    for (int i = numLargeSegs; i < numSegs; i++) {
        info = infos.info(i);
        totalSmallSegSize += size(info);
    }

    long targetSegSize = (totalLargeSegSize / (numLargeSegs - 1));
    if (targetSegSize <= totalSmallSegSize) {
        // the total size of small segments is big enough,
        // promote the small segments to a large segment and do balanced merge,

        if (totalSmallSegSize < targetSegSize * 2) {
            MergeSpecification spec = findBalancedMerges(infos, numLargeSegs, (numLargeSegs - 1),
                    _partialExpunge);
            if (spec == null)
                spec = new MergeSpecification(); // should not happen
            spec.add(new OneMerge(infos.asList().subList(numLargeSegs, numSegs)));
            return spec;
        } else {
            return findBalancedMerges(infos, numSegs, numLargeSegs, _partialExpunge);
        }
    } else if (_maxSegments < numSegs) {
        // we have more than _maxSegments, merge small segments smaller than targetSegSize/4
        MergeSpecification spec = new MergeSpecification();
        int startSeg = numLargeSegs;
        long sizeThreshold = (targetSegSize / 4);
        while (startSeg < numSegs) {
            info = infos.info(startSeg);
            if (size(info) < sizeThreshold)
                break;
            startSeg++;
        }
        spec.add(new OneMerge(infos.asList().subList(startSeg, numSegs)));
        return spec;
    } else {
        // apply the log merge policy to small segments.
        List<SegmentInfoPerCommit> smallSegmentList = infos.asList().subList(numLargeSegs, numSegs);
        SegmentInfos smallSegments = new SegmentInfos();
        smallSegments.addAll(smallSegmentList);
        MergeSpecification spec = super.findMerges(mergeTrigger, smallSegments);

        if (_partialExpunge) {
            OneMerge expunge = findOneSegmentToExpunge(infos, numLargeSegs);
            if (expunge != null) {
                if (spec == null)
                    spec = new MergeSpecification();
                spec.add(expunge);
            }
        }
        return spec;
    }
}

From source file:cn.hbu.cs.esearch.util.IndexUtil.java

License:Apache License

public static int getNumSegments(Directory idx) throws IOException {
    SegmentInfos infos = new SegmentInfos();
    infos.read(idx);//from  w  w  w.  j a va2 s .c o  m
    return infos.size();
}

From source file:cn.hbu.cs.esearch.util.IndexUtil.java

License:Apache License

public static String getSegmentsInfo(Directory idx) {
    SegmentInfos infos = new SegmentInfos();
    try {//from   ww w .ja v  a 2s  . c  o  m
        infos.read(idx);
        StringBuilder buf = new StringBuilder();
        for (int i = 0; i < infos.size(); i++) {
            SegmentInfoPerCommit segInfo = infos.info(i);
            buf.append("[").append(segInfo.info.name).append(",numDoc:").append(segInfo.info.getDocCount())
                    .append(",numDel:").append(segInfo.getDelCount()).append("]");
        }
        return buf.toString();
    } catch (Exception e) {
        return e.toString();
    }
}

From source file:com.senseidb.clue.commands.SegmentsCommand.java

License:Apache License

@Override
public void execute(String[] args, PrintStream out) throws Exception {

    SegmentInfos sis = new SegmentInfos();
    sis.read(ctx.getDirectory());//from  ww w .j ava  2 s.co m

    for (SegmentCommitInfo sci : sis) {
        out.print(sci.info.toString());
        out.print("  ");
        out.print(sci.info.getDocCount());
        out.print(" (");
        out.print(sci.getDelCount());
        out.println(")");
    }

}

From source file:org.apache.blur.mapreduce.lib.BlurInputFormat.java

License:Apache License

public static List<BlurInputSplit> getSplitForDirectory(Path shardDir, Configuration configuration, Text table,
        Text snapshot, Directory directory) throws IOException {
    List<BlurInputSplit> splits = new ArrayList<BlurInputSplit>();
    SnapshotIndexDeletionPolicy policy = new SnapshotIndexDeletionPolicy(configuration,
            SnapshotIndexDeletionPolicy.getGenerationsPath(shardDir));

    Long generation = policy.getGeneration(snapshot.toString());
    if (generation == null) {
        throw new IOException("Snapshot [" + snapshot + "] not found in shard [" + shardDir + "]");
    }/*w w w  . ja va 2 s  .  c om*/

    List<IndexCommit> listCommits = DirectoryReader.listCommits(directory);
    IndexCommit indexCommit = findIndexCommit(listCommits, generation, shardDir);

    String segmentsFileName = indexCommit.getSegmentsFileName();
    SegmentInfos segmentInfos = new SegmentInfos();
    segmentInfos.read(directory, segmentsFileName);
    for (SegmentInfoPerCommit commit : segmentInfos) {
        SegmentInfo segmentInfo = commit.info;
        if (commit.getDelCount() == segmentInfo.getDocCount()) {
            LOG.info("Segment [{0}] in dir [{1}] has all records deleted.", segmentInfo.name, shardDir);
        } else {
            String name = segmentInfo.name;
            Collection<String> files = commit.files();
            long fileLength = 0;
            for (String file : files) {
                fileLength += directory.fileLength(file);
            }
            List<String> dirFiles = new ArrayList<String>(files);
            dirFiles.add(segmentsFileName);
            splits.add(new BlurInputSplit(shardDir, segmentsFileName, name, fileLength, table, dirFiles));
        }
    }
    return splits;
}

From source file:org.apache.luke.client.LukeInspector.java

License:Apache License

/**
* open Lucene index and re-init all the sub-widgets
* @param name//from  w  w  w . j  av  a  2 s  .  c o  m
* @param force
* @param dirImpl
* @param ro
* @param ramdir
* @param keepCommits
* @param point
* @param tiiDivisor
*/
public void openIndex(String name, boolean force, String dirImpl, boolean ro, boolean ramdir,
        boolean keepCommits, IndexCommit point, int tiiDivisor) {
    pName = name;
    File baseFileDir = new File(name);

    ArrayList<Directory> dirs = new ArrayList<Directory>();
    Throwable lastException = null;

    try {
        Directory d = openDirectory(dirImpl, pName, false);
        if (IndexWriter.isLocked(d)) {
            if (!ro) {
                if (force) {
                    IndexWriter.unlock(d);
                } else {
                    //errorMsg("Index is locked. Try 'Force unlock' when opening.");
                    d.close();
                    d = null;
                    return;
                }
            }
        }
        boolean existsSingle = false;
        // IR.indexExists doesn't report the cause of error
        try {
            new SegmentInfos().read(d);
            existsSingle = true;
        } catch (Throwable e) {
            e.printStackTrace();
            lastException = e;
            //
        }
        if (!existsSingle) { // try multi
            File[] files = baseFileDir.listFiles();
            for (File f : files) {
                if (f.isFile()) {
                    continue;
                }
                Directory d1 = openDirectory(dirImpl, f.toString(), false);
                if (IndexWriter.isLocked(d1)) {
                    if (!ro) {
                        if (force) {
                            IndexWriter.unlock(d1);
                        } else {
                            //errorMsg("Index is locked. Try 'Force unlock' when opening.");
                            d1.close();
                            d1 = null;
                            return;
                        }
                    }
                }
                existsSingle = false;
                try {
                    new SegmentInfos().read(d1);
                    existsSingle = true;
                } catch (Throwable e) {
                    lastException = e;
                    e.printStackTrace();
                }
                if (!existsSingle) {
                    d1.close();
                    continue;
                }
                dirs.add(d1);
            }
        } else {
            dirs.add(d);
        }

        if (dirs.size() == 0) {
            if (lastException != null) {
                //errorMsg("Invalid directory at the location, check console for more information. Last exception:\n" + lastException.toString());
            } else {
                //errorMsg("No valid directory at the location, try another location.\nCheck console for other possible causes.");
            }
            return;
        }

        if (ramdir) {
            //showStatus("Loading index into RAMDirectory ...");
            Directory dir1 = new RAMDirectory();
            IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV));
            IndexWriter iw1 = new IndexWriter(dir1, cfg);
            iw1.addIndexes((Directory[]) dirs.toArray(new Directory[dirs.size()]));
            iw1.close();
            //showStatus("RAMDirectory loading done!");
            if (dir != null)
                dir.close();
            dirs.clear();
            dirs.add(dir1);
        }
        IndexDeletionPolicy policy;
        if (keepCommits) {
            policy = new KeepAllIndexDeletionPolicy();
        } else {
            policy = new KeepLastIndexDeletionPolicy();
        }
        ArrayList<DirectoryReader> readers = new ArrayList<DirectoryReader>();
        for (Directory dd : dirs) {
            DirectoryReader reader;
            if (tiiDivisor > 1) {
                reader = DirectoryReader.open(dd, tiiDivisor);
            } else {
                reader = DirectoryReader.open(dd);
            }
            readers.add(reader);
        }
        if (readers.size() == 1) {
            ir = readers.get(0);
            dir = ((DirectoryReader) ir).directory();
        } else {
            ir = new MultiReader((IndexReader[]) readers.toArray(new IndexReader[readers.size()]));
        }
        is = new IndexSearcher(ir);
        // XXX 
        //slowAccess = false;
        //initOverview();
        //initPlugins();
        //showStatus("Index successfully open.");
    } catch (Exception e) {
        e.printStackTrace();
        //errorMsg(e.getMessage());
        return;
    }
}

From source file:org.apache.mahout.text.LuceneSegmentInputFormat.java

License:Apache License

@Override
public List<LuceneSegmentInputSplit> getSplits(JobContext context) throws IOException, InterruptedException {
    Configuration configuration = context.getConfiguration();

    LuceneStorageConfiguration lucene2SeqConfiguration = new LuceneStorageConfiguration(configuration);

    List<LuceneSegmentInputSplit> inputSplits = Lists.newArrayList();

    List<Path> indexPaths = lucene2SeqConfiguration.getIndexPaths();
    for (Path indexPath : indexPaths) {
        ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration),
                indexPath, false, configuration);
        SegmentInfos segmentInfos = new SegmentInfos();
        segmentInfos.read(directory);/*from   www  . j a v  a 2s . co  m*/

        for (SegmentCommitInfo segmentInfo : segmentInfos) {
            LuceneSegmentInputSplit inputSplit = new LuceneSegmentInputSplit(indexPath, segmentInfo.info.name,
                    segmentInfo.sizeInBytes());
            inputSplits.add(inputSplit);
            LOG.info("Created {} byte input split for index '{}' segment {}", segmentInfo.sizeInBytes(),
                    indexPath.toUri(), segmentInfo.info.name);
        }
    }

    return inputSplits;
}

From source file:org.apache.mahout.text.LuceneSegmentInputSplit.java

License:Apache License

/**
 * Get the {@link SegmentInfo} of this {@link InputSplit} via the given {@link Configuration}
 *
 * @param configuration the configuration used to locate the index
 * @return the segment info or throws exception if not found
 * @throws IOException if an error occurs when accessing the directory
 *//*from  ww  w.  j ava2s .  com*/
public SegmentCommitInfo getSegment(Configuration configuration) throws IOException {
    ReadOnlyFileSystemDirectory directory = new ReadOnlyFileSystemDirectory(FileSystem.get(configuration),
            indexPath, false, configuration);

    SegmentInfos segmentInfos = new SegmentInfos();
    segmentInfos.read(directory);

    for (SegmentCommitInfo segmentInfo : segmentInfos) {
        if (segmentInfo.info.name.equals(segmentInfoName)) {
            return segmentInfo;
        }
    }

    throw new IllegalArgumentException(
            "No such segment: '" + segmentInfoName + "' in directory " + directory.toString());
}

From source file:org.apache.mahout.text.LuceneSegmentRecordReaderTest.java

License:Apache License

@Before
public void before() throws IOException, InterruptedException {
    LuceneStorageConfiguration lucene2SeqConf = new LuceneStorageConfiguration(getConfiguration(),
            asList(getIndexPath1()), new Path("output"), ID_FIELD, asList(FIELD));
    configuration = lucene2SeqConf.serialize();
    recordReader = new LuceneSegmentRecordReader();
    commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(0, 500));
    commitDocuments(getDirectory(getIndexPath1AsFile()), docs.subList(500, 1000));
    segmentInfos = new SegmentInfos();
    segmentInfos.read(getDirectory(getIndexPath1AsFile()));
}