Example usage for org.apache.lucene.index IndexWriter addIndexes

List of usage examples for org.apache.lucene.index IndexWriter addIndexes

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addIndexes.

Prototype

public long addIndexes(CodecReader... readers) throws IOException 

Source Link

Document

Merges the provided indexes into this index.

Usage

From source file:org.apache.blur.utils.TableShardCountCollapser.java

License:Apache License

public void collapseShardsTo(int newShardCount) throws IOException {
    if (!validateCount(newShardCount)) {
        throw new RuntimeException("Count [" + newShardCount + "] is not valid, valid values are ["
                + getCollapsePossibilities() + "]");
    }/*from ww w .j  a va2s .  com*/

    Path[] paths = getPaths();
    int numberOfShardsToMergePerPass = paths.length / newShardCount;
    for (int i = 0; i < newShardCount; i++) {
        System.out.println("Base Index [" + paths[i] + "]");
        IndexWriterConfig lconf = new IndexWriterConfig(LUCENE_VERSION, new KeywordAnalyzer());
        lconf.setCodec(new Blur024Codec());
        HdfsDirectory dir = new HdfsDirectory(getConf(), paths[i]);
        IndexWriter indexWriter = new IndexWriter(dir, lconf);
        Directory[] dirs = new Directory[numberOfShardsToMergePerPass - 1];
        Path[] pathsToDelete = new Path[numberOfShardsToMergePerPass - 1];
        for (int p = 1; p < numberOfShardsToMergePerPass; p++) {
            Path pathToMerge = paths[i + p * newShardCount];
            System.out.println("Merge [" + pathToMerge + "]");
            dirs[p - 1] = new HdfsDirectory(getConf(), pathToMerge);
            pathsToDelete[p - 1] = pathToMerge;
        }
        indexWriter.addIndexes(dirs);
        // Causes rewrite of of index and the symlinked files are
        // merged/rewritten.
        indexWriter.forceMerge(1);
        indexWriter.close();
        FileSystem fileSystem = path.getFileSystem(getConf());
        for (Path p : pathsToDelete) {
            fileSystem.delete(p, true);
        }
    }
}

From source file:org.apache.jackrabbit.core.query.lucene.IndexMigration.java

License:Apache License

/**
 * Checks if the given <code>index</code> needs to be migrated.
 *
 * @param index the index to check and migration if needed.
 * @param indexDir the directory where the index is stored.
 * @throws IOException if an error occurs while migrating the index.
 *//*  w w w  .j av a  2s .co m*/
public static void migrate(PersistentIndex index, File indexDir) throws IOException {
    log.debug("Checking {} ...", indexDir.getAbsolutePath());
    ReadOnlyIndexReader reader = index.getReadOnlyIndexReader();
    try {
        if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) {
            // index was created with Jackrabbit 1.5 or higher
            // no need for migration
            log.debug("IndexFormatVersion >= V3, no migration needed");
            return;
        }
        // assert: there is at least one node in the index, otherwise the
        //         index format version would be at least V3
        TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, ""));
        try {
            Term t = terms.term();
            if (t.text().indexOf('\uFFFF') == -1) {
                log.debug("Index already migrated");
                return;
            }
        } finally {
            terms.close();
        }
    } finally {
        reader.release();
    }

    // if we get here then the index must be migrated
    log.debug("Index requires migration {}", indexDir.getAbsolutePath());

    // make sure readers are closed, otherwise the directory
    // cannot be deleted
    index.releaseWriterAndReaders();

    File migrationDir = new File(indexDir.getAbsoluteFile().getParentFile(), indexDir.getName() + "_v2.3");
    if (migrationDir.exists()) {
        FileUtil.delete(migrationDir);
    }
    if (!migrationDir.mkdirs()) {
        throw new IOException("failed to create directory " + migrationDir.getAbsolutePath());
    }
    FSDirectory fsDir = FSDirectory.getDirectory(migrationDir, NoLockFactory.getNoLockFactory());
    try {
        IndexWriter writer = new IndexWriter(fsDir, new JackrabbitAnalyzer());
        try {
            IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()));
            try {
                writer.addIndexes(new IndexReader[] { r });
                writer.close();
            } finally {
                r.close();
            }
        } finally {
            writer.close();
        }
    } finally {
        fsDir.close();
    }
    FileUtil.delete(indexDir);
    if (!migrationDir.renameTo(indexDir)) {
        throw new IOException("failed to move migrated directory " + migrationDir.getAbsolutePath());
    }
    log.info("Migrated " + indexDir.getAbsolutePath());
}

From source file:org.apache.jackrabbit.core.query.lucene.IndexMigration.java

License:Apache License

/**
 * Checks if the given <code>index</code> needs to be migrated.
 *
 * @param index the index to check and migration if needed.
 * @param directoryManager the directory manager.
 * @param oldSeparatorChar the old separator char that needs to be replaced.
 * @throws IOException if an error occurs while migrating the index.
 *///w w w  . j a v  a2s .co  m
public static void migrate(PersistentIndex index, DirectoryManager directoryManager, char oldSeparatorChar)
        throws IOException {
    Directory indexDir = index.getDirectory();
    log.debug("Checking {} ...", indexDir);
    ReadOnlyIndexReader reader = index.getReadOnlyIndexReader();
    try {
        if (IndexFormatVersion.getVersion(reader).getVersion() >= IndexFormatVersion.V3.getVersion()) {
            // index was created with Jackrabbit 1.5 or higher
            // no need for migration
            log.debug("IndexFormatVersion >= V3, no migration needed");
            return;
        }
        // assert: there is at least one node in the index, otherwise the
        //         index format version would be at least V3
        TermEnum terms = reader.terms(new Term(FieldNames.PROPERTIES, ""));
        try {
            Term t = terms.term();
            if (t.text().indexOf(oldSeparatorChar) == -1) {
                log.debug("Index already migrated");
                return;
            }
        } finally {
            terms.close();
        }
    } finally {
        reader.release();
        index.releaseWriterAndReaders();
    }

    // if we get here then the index must be migrated
    log.debug("Index requires migration {}", indexDir);

    String migrationName = index.getName() + "_v36";
    if (directoryManager.hasDirectory(migrationName)) {
        directoryManager.delete(migrationName);
    }

    Directory migrationDir = directoryManager.getDirectory(migrationName);
    final IndexWriterConfig c = new IndexWriterConfig(Version.LUCENE_36, new JackrabbitAnalyzer());
    c.setMergePolicy(new UpgradeIndexMergePolicy(new LogByteSizeMergePolicy()));
    c.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy());
    try {
        IndexWriter writer = new IndexWriter(migrationDir, c);
        try {
            IndexReader r = new MigrationIndexReader(IndexReader.open(index.getDirectory()), oldSeparatorChar);
            try {
                writer.addIndexes(r);
                writer.forceMerge(1);
                writer.close();
            } finally {
                r.close();
            }
        } finally {
            writer.close();
        }
    } finally {
        migrationDir.close();
    }
    directoryManager.delete(index.getName());
    if (!directoryManager.rename(migrationName, index.getName())) {
        throw new IOException("failed to move migrated directory " + migrationDir);
    }
    log.info("Migrated " + index.getName());
}

From source file:org.apache.luke.client.LukeInspector.java

License:Apache License

/**
* open Lucene index and re-init all the sub-widgets
* @param name//from  w  ww.j  a  va  2  s. co  m
* @param force
* @param dirImpl
* @param ro
* @param ramdir
* @param keepCommits
* @param point
* @param tiiDivisor
*/
public void openIndex(String name, boolean force, String dirImpl, boolean ro, boolean ramdir,
        boolean keepCommits, IndexCommit point, int tiiDivisor) {
    pName = name;
    File baseFileDir = new File(name);

    ArrayList<Directory> dirs = new ArrayList<Directory>();
    Throwable lastException = null;

    try {
        Directory d = openDirectory(dirImpl, pName, false);
        if (IndexWriter.isLocked(d)) {
            if (!ro) {
                if (force) {
                    IndexWriter.unlock(d);
                } else {
                    //errorMsg("Index is locked. Try 'Force unlock' when opening.");
                    d.close();
                    d = null;
                    return;
                }
            }
        }
        boolean existsSingle = false;
        // IR.indexExists doesn't report the cause of error
        try {
            new SegmentInfos().read(d);
            existsSingle = true;
        } catch (Throwable e) {
            e.printStackTrace();
            lastException = e;
            //
        }
        if (!existsSingle) { // try multi
            File[] files = baseFileDir.listFiles();
            for (File f : files) {
                if (f.isFile()) {
                    continue;
                }
                Directory d1 = openDirectory(dirImpl, f.toString(), false);
                if (IndexWriter.isLocked(d1)) {
                    if (!ro) {
                        if (force) {
                            IndexWriter.unlock(d1);
                        } else {
                            //errorMsg("Index is locked. Try 'Force unlock' when opening.");
                            d1.close();
                            d1 = null;
                            return;
                        }
                    }
                }
                existsSingle = false;
                try {
                    new SegmentInfos().read(d1);
                    existsSingle = true;
                } catch (Throwable e) {
                    lastException = e;
                    e.printStackTrace();
                }
                if (!existsSingle) {
                    d1.close();
                    continue;
                }
                dirs.add(d1);
            }
        } else {
            dirs.add(d);
        }

        if (dirs.size() == 0) {
            if (lastException != null) {
                //errorMsg("Invalid directory at the location, check console for more information. Last exception:\n" + lastException.toString());
            } else {
                //errorMsg("No valid directory at the location, try another location.\nCheck console for other possible causes.");
            }
            return;
        }

        if (ramdir) {
            //showStatus("Loading index into RAMDirectory ...");
            Directory dir1 = new RAMDirectory();
            IndexWriterConfig cfg = new IndexWriterConfig(LV, new WhitespaceAnalyzer(LV));
            IndexWriter iw1 = new IndexWriter(dir1, cfg);
            iw1.addIndexes((Directory[]) dirs.toArray(new Directory[dirs.size()]));
            iw1.close();
            //showStatus("RAMDirectory loading done!");
            if (dir != null)
                dir.close();
            dirs.clear();
            dirs.add(dir1);
        }
        IndexDeletionPolicy policy;
        if (keepCommits) {
            policy = new KeepAllIndexDeletionPolicy();
        } else {
            policy = new KeepLastIndexDeletionPolicy();
        }
        ArrayList<DirectoryReader> readers = new ArrayList<DirectoryReader>();
        for (Directory dd : dirs) {
            DirectoryReader reader;
            if (tiiDivisor > 1) {
                reader = DirectoryReader.open(dd, tiiDivisor);
            } else {
                reader = DirectoryReader.open(dd);
            }
            readers.add(reader);
        }
        if (readers.size() == 1) {
            ir = readers.get(0);
            dir = ((DirectoryReader) ir).directory();
        } else {
            ir = new MultiReader((IndexReader[]) readers.toArray(new IndexReader[readers.size()]));
        }
        is = new IndexSearcher(ir);
        // XXX 
        //slowAccess = false;
        //initOverview();
        //initPlugins();
        //showStatus("Index successfully open.");
    } catch (Exception e) {
        e.printStackTrace();
        //errorMsg(e.getMessage());
        return;
    }
}

From source file:org.apache.nutch.indexer.IndexMerger.java

License:Apache License

/**
 * Merge all input indexes to the single output index
 *///from   w w w. ja  v a  2  s.  c o m
public void merge(Path[] indexes, Path outputIndex, Path localWorkingDir) throws IOException {
    if (LOG.isInfoEnabled()) {
        LOG.info("merging indexes to: " + outputIndex);
    }
    FileSystem localFs = FileSystem.getLocal(getConf());
    if (localWorkingDir == null) {
        localWorkingDir = new Path("indexmerger-" + System.currentTimeMillis());
    }
    if (localFs.exists(localWorkingDir)) {
        localFs.delete(localWorkingDir);
    }
    localFs.mkdirs(localWorkingDir);

    // Get local output target
    //
    FileSystem fs = FileSystem.get(getConf());
    Path tmpLocalOutput = new Path(localWorkingDir, "merge-output");
    Path localOutput = fs.startLocalOutput(outputIndex, tmpLocalOutput);

    Directory[] dirs = new Directory[indexes.length];
    for (int i = 0; i < indexes.length; i++) {
        if (LOG.isInfoEnabled()) {
            LOG.info("Adding " + indexes[i]);
        }
        dirs[i] = new FsDirectory(fs, indexes[i], false, this.conf);
    }

    //

    //
    // Merge indices
    //
    IndexWriter writer = new IndexWriter(localOutput.toString(), null, true);
    writer.setMergeFactor(conf.getInt("indexer.mergeFactor", IndexWriter.DEFAULT_MERGE_FACTOR));
    writer.setMaxBufferedDocs(conf.getInt("indexer.minMergeDocs", IndexWriter.DEFAULT_MAX_BUFFERED_DOCS));
    writer.setMaxMergeDocs(conf.getInt("indexer.maxMergeDocs", IndexWriter.DEFAULT_MAX_MERGE_DOCS));
    writer.setTermIndexInterval(
            conf.getInt("indexer.termIndexInterval", IndexWriter.DEFAULT_TERM_INDEX_INTERVAL));
    writer.setInfoStream(LogUtil.getDebugStream(LOG));
    writer.setUseCompoundFile(false);
    writer.setSimilarity(new NutchSimilarity());
    writer.addIndexes(dirs);
    writer.close();

    //
    // Put target back
    //
    fs.completeLocalOutput(outputIndex, tmpLocalOutput);
    FileSystem.getLocal(conf).delete(localWorkingDir);
    if (LOG.isInfoEnabled()) {
        LOG.info("done merging");
    }
}

From source file:org.apache.nutch.indexer.IndexOptimizer.java

License:Apache License

public void optimize() throws IOException {
    IndexReader reader = IndexReader.open(new File(directory, "index"));
    OptimizingReader optimizer = new OptimizingReader(reader);
    IndexWriter writer = new IndexWriter(new File(directory, "index-opt"), null, true);
    writer.addIndexes(new IndexReader[] { optimizer });
}

From source file:org.apache.nutch.indexer.IndexSorter.java

License:Apache License

public void sort(File directory) throws IOException {
    LOG.info("IndexSorter: starting.");
    Date start = new Date();
    int termIndexInterval = getConf().getInt("indexer.termIndexInterval", 128);
    IndexReader reader = IndexReader.open(new File(directory, "index"));

    SortingReader sorter = new SortingReader(reader, oldToNew(reader));
    IndexWriter writer = new IndexWriter(new File(directory, "index-sorted"), null, true);
    writer.setTermIndexInterval(termIndexInterval);
    writer.setUseCompoundFile(false);/*  ww w  .j  a va2  s.co m*/
    writer.addIndexes(new IndexReader[] { sorter });
    writer.close();
    Date end = new Date();
    LOG.info("IndexSorter: done, " + (end.getTime() - start.getTime()) + " total milliseconds");
}

From source file:org.apache.nutch.indexer.IndexSorterArquivoWeb.java

License:Apache License

public void sort(File directory) throws IOException {
    LOG.info("IndexSorter: starting.");
    Date start = new Date();
    int termIndexInterval = getConf().getInt("indexer.termIndexInterval", 128);
    IndexReader reader = IndexReader.open(new File(directory, "index"));
    Searcher searcher = new IndexSearcher(new File(directory, "index").getAbsolutePath()); // TODO MC

    SortingReader sorter = new SortingReader(reader, newToOld(reader, searcher)); // TODO MC
    IndexWriter writer = new IndexWriter(new File(directory, "index-sorted"), null, true);
    writer.setTermIndexInterval(termIndexInterval);
    writer.setUseCompoundFile(false);//from w ww  . ja  va  2 s .com
    writer.addIndexes(new IndexReader[] { sorter });
    writer.close();
    Date end = new Date();
    LOG.info("IndexSorter: done, " + (end.getTime() - start.getTime()) + " total milliseconds");
}

From source file:org.apache.nutch.tools.SegmentMergeTool.java

License:Apache License

/** Run the tool, periodically reporting progress. */
public void run() {
    start = System.currentTimeMillis();
    stage = SegmentMergeStatus.STAGE_OPENING;
    long delta;/*  w w w .j av  a 2s  .c  om*/
    LOG.info("* Opening " + allsegdirs.size() + " segments:");
    try {
        segdirs = new ArrayList();
        // open all segments
        for (int i = 0; i < allsegdirs.size(); i++) {
            File dir = (File) allsegdirs.get(i);
            SegmentReader sr = null;
            try {
                // try to autofix it if corrupted...
                sr = new SegmentReader(nfs, dir, true);
            } catch (Exception e) {
                // this segment is hosed beyond repair, don't use it
                LOG.warning("* Segment " + dir.getName() + " is corrupt beyond repair; skipping it.");
                continue;
            }
            segdirs.add(dir);
            totalRecords += sr.size;
            LOG.info(" - segment " + dir.getName() + ": " + sr.size + " records.");
            readers.put(dir.getName(), sr);
        }
        long total = totalRecords;
        LOG.info("* TOTAL " + total + " input records in " + segdirs.size() + " segments.");
        LOG.info("* Creating master index...");
        stage = SegmentMergeStatus.STAGE_MASTERIDX;
        // XXX Note that Lucene indexes don't work with NutchFileSystem for now.
        // XXX For now always assume LocalFileSystem here...
        Vector masters = new Vector();
        File fsmtIndexDir = new File(output, ".fastmerge_index");
        File masterDir = new File(fsmtIndexDir, "0");
        if (!masterDir.mkdirs()) {
            LOG.severe("Could not create a master index dir: " + masterDir);
            return;
        }
        masters.add(masterDir);
        IndexWriter iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true);
        iw.setUseCompoundFile(false);
        iw.setMergeFactor(INDEX_MERGE_FACTOR);
        iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS);
        long s1 = System.currentTimeMillis();
        Iterator it = readers.values().iterator();
        processedRecords = 0L;
        delta = System.currentTimeMillis();
        while (it.hasNext()) {
            SegmentReader sr = (SegmentReader) it.next();
            String name = sr.segmentDir.getName();
            FetcherOutput fo = new FetcherOutput();
            for (long i = 0; i < sr.size; i++) {
                try {
                    if (!sr.get(i, fo, null, null, null))
                        break;

                    Document doc = new Document();

                    // compute boost
                    float boost = IndexSegment.calculateBoost(fo.getFetchListEntry().getPage().getScore(),
                            scorePower, boostByLinkCount, fo.getAnchors().length);
                    //            doc.add(new Field("sd", name + "|" + i, true, false, false));
                    //            doc.add(new Field("uh", MD5Hash.digest(fo.getUrl().toString()).toString(), true, true, false));
                    //            doc.add(new Field("ch", fo.getMD5Hash().toString(), true, true, false));
                    //            doc.add(new Field("time", DateField.timeToString(fo.getFetchDate()), true, false, false));
                    //            doc.add(new Field("score", boost + "", true, false, false));
                    //            doc.add(new Field("ul", fo.getUrl().toString().length() + "", true, false, false));
                    iw.addDocument(doc);
                    processedRecords++;
                    if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
                        LOG.info(" Processed " + processedRecords + " records ("
                                + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta)
                                + " rec/s)");
                        delta = System.currentTimeMillis();
                    }
                    if (processedRecords > 0 && (processedRecords % INDEX_SIZE == 0)) {
                        iw.optimize();
                        iw.close();
                        LOG.info(" - creating next subindex...");
                        masterDir = new File(fsmtIndexDir, "" + masters.size());
                        if (!masterDir.mkdirs()) {
                            LOG.severe("Could not create a master index dir: " + masterDir);
                            return;
                        }
                        masters.add(masterDir);
                        iw = new IndexWriter(masterDir, new WhitespaceAnalyzer(), true);
                        iw.setUseCompoundFile(false);
                        iw.setMergeFactor(INDEX_MERGE_FACTOR);
                        iw.setRAMBufferSizeMB(INDEX_MIN_MERGE_DOCS);
                    }
                } catch (Throwable t) {
                    // we can assume the data is invalid from now on - break here
                    LOG.info(" - segment " + name + " truncated to " + (i + 1) + " records");
                    break;
                }
            }
        }
        iw.optimize();
        LOG.info("* Creating index took " + (System.currentTimeMillis() - s1) + " ms");
        s1 = System.currentTimeMillis();
        // merge all other indexes using the latest IndexWriter (still open):
        if (masters.size() > 1) {
            LOG.info(" - merging subindexes...");
            stage = SegmentMergeStatus.STAGE_MERGEIDX;
            IndexReader[] ireaders = new IndexReader[masters.size() - 1];
            for (int i = 0; i < masters.size() - 1; i++)
                ireaders[i] = IndexReader.open((File) masters.get(i));
            iw.addIndexes(ireaders);
            for (int i = 0; i < masters.size() - 1; i++) {
                ireaders[i].close();
                FileUtil.fullyDelete((File) masters.get(i));
            }
        }
        iw.close();
        LOG.info("* Optimizing index took " + (System.currentTimeMillis() - s1) + " ms");
        LOG.info("* Removing duplicate entries...");
        stage = SegmentMergeStatus.STAGE_DEDUP;
        IndexReader ir = IndexReader.open(masterDir);
        int i = 0;
        long cnt = 0L;
        processedRecords = 0L;
        s1 = System.currentTimeMillis();
        delta = s1;
        TermEnum te = ir.terms();
        while (te.next()) {
            Term t = te.term();
            if (t == null)
                continue;
            if (!(t.field().equals("ch") || t.field().equals("uh")))
                continue;
            cnt++;
            processedRecords = cnt / 2;
            if (cnt > 0 && (cnt % (LOG_STEP * 2) == 0)) {
                LOG.info(" Processed " + processedRecords + " records ("
                        + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)");
                delta = System.currentTimeMillis();
            }
            // Enumerate all docs with the same URL hash or content hash
            TermDocs td = ir.termDocs(t);
            if (td == null)
                continue;
            if (t.field().equals("uh")) {
                // Keep only the latest version of the document with
                // the same url hash. Note: even if the content
                // hash is identical, other metadata may be different, so even
                // in this case it makes sense to keep the latest version.
                int id = -1;
                String time = null;
                Document doc = null;
                while (td.next()) {
                    int docid = td.doc();
                    if (!ir.isDeleted(docid)) {
                        doc = ir.document(docid);
                        if (time == null) {
                            time = doc.get("time");
                            id = docid;
                            continue;
                        }
                        String dtime = doc.get("time");
                        // "time" is a DateField, and can be compared lexicographically
                        if (dtime.compareTo(time) > 0) {
                            if (id != -1) {
                                ir.deleteDocument(id);
                            }
                            time = dtime;
                            id = docid;
                        } else {
                            ir.deleteDocument(docid);
                        }
                    }
                }
            } else if (t.field().equals("ch")) {
                // Keep only the version of the document with
                // the highest score, and then with the shortest url.
                int id = -1;
                int ul = 0;
                float score = 0.0f;
                Document doc = null;
                while (td.next()) {
                    int docid = td.doc();
                    if (!ir.isDeleted(docid)) {
                        doc = ir.document(docid);
                        if (ul == 0) {
                            try {
                                ul = Integer.parseInt(doc.get("ul"));
                                score = Float.parseFloat(doc.get("score"));
                            } catch (Exception e) {
                            }
                            ;
                            id = docid;
                            continue;
                        }
                        int dul = 0;
                        float dscore = 0.0f;
                        try {
                            dul = Integer.parseInt(doc.get("ul"));
                            dscore = Float.parseFloat(doc.get("score"));
                        } catch (Exception e) {
                        }
                        ;
                        int cmp = Float.compare(dscore, score);
                        if (cmp == 0) {
                            // equal scores, select the one with shortest url
                            if (dul < ul) {
                                if (id != -1) {
                                    ir.deleteDocument(id);
                                }
                                ul = dul;
                                id = docid;
                            } else {
                                ir.deleteDocument(docid);
                            }
                        } else if (cmp < 0) {
                            ir.deleteDocument(docid);
                        } else {
                            if (id != -1) {
                                ir.deleteDocument(id);
                            }
                            ul = dul;
                            id = docid;
                        }
                    }
                }
            }
        }
        //
        // keep the IndexReader open...
        //

        LOG.info("* Deduplicating took " + (System.currentTimeMillis() - s1) + " ms");
        stage = SegmentMergeStatus.STAGE_WRITING;
        processedRecords = 0L;
        Vector outDirs = new Vector();
        File outDir = new File(output, SegmentWriter.getNewSegmentName());
        outDirs.add(outDir);
        LOG.info("* Merging all segments into " + output.getName());
        s1 = System.currentTimeMillis();
        delta = s1;
        nfs.mkdirs(outDir);
        SegmentWriter sw = new SegmentWriter(nfs, outDir, true);
        LOG.fine(" - opening first output segment in " + outDir.getName());
        FetcherOutput fo = new FetcherOutput();
        Content co = new Content();
        ParseText pt = new ParseText();
        ParseData pd = new ParseData();
        int outputCnt = 0;
        for (int n = 0; n < ir.maxDoc(); n++) {
            if (ir.isDeleted(n)) {
                //System.out.println("-del");
                continue;
            }
            Document doc = ir.document(n);
            String segDoc = doc.get("sd");
            int idx = segDoc.indexOf('|');
            String segName = segDoc.substring(0, idx);
            String docName = segDoc.substring(idx + 1);
            SegmentReader sr = (SegmentReader) readers.get(segName);
            long docid;
            try {
                docid = Long.parseLong(docName);
            } catch (Exception e) {
                continue;
            }
            try {
                // get data from the reader
                sr.get(docid, fo, co, pt, pd);
            } catch (Throwable thr) {
                // don't break the loop, because only one of the segments
                // may be corrupted...
                LOG.fine(" - corrupt record no. " + docid + " in segment " + sr.segmentDir.getName()
                        + " - skipping.");
                continue;
            }
            sw.append(fo, co, pt, pd);
            outputCnt++;
            processedRecords++;
            if (processedRecords > 0 && (processedRecords % LOG_STEP == 0)) {
                LOG.info(" Processed " + processedRecords + " records ("
                        + (float) (LOG_STEP * 1000) / (float) (System.currentTimeMillis() - delta) + " rec/s)");
                delta = System.currentTimeMillis();
            }
            if (processedRecords % maxCount == 0) {
                sw.close();
                outDir = new File(output, SegmentWriter.getNewSegmentName());
                LOG.fine(" - starting next output segment in " + outDir.getName());
                nfs.mkdirs(outDir);
                sw = new SegmentWriter(nfs, outDir, true);
                outDirs.add(outDir);
            }
        }
        LOG.info("* Merging took " + (System.currentTimeMillis() - s1) + " ms");
        ir.close();
        sw.close();
        FileUtil.fullyDelete(fsmtIndexDir);
        for (Iterator iter = readers.keySet().iterator(); iter.hasNext();) {
            SegmentReader sr = (SegmentReader) readers.get(iter.next());
            sr.close();
        }
        if (runIndexer) {
            stage = SegmentMergeStatus.STAGE_INDEXING;
            totalRecords = outDirs.size();
            processedRecords = 0L;
            LOG.info("* Creating new segment index(es)...");
            File workingDir = new File(output, "indexsegment-workingdir");
            for (int k = 0; k < outDirs.size(); k++) {
                processedRecords++;
                if (workingDir.exists()) {
                    FileUtil.fullyDelete(workingDir);
                }
                IndexSegment indexer = new IndexSegment(nfs, Integer.MAX_VALUE, (File) outDirs.get(k),
                        workingDir);
                indexer.indexPages();
                FileUtil.fullyDelete(workingDir);
            }
        }
        if (delSegs) {
            // This deletes also all corrupt segments, which are
            // unusable anyway
            stage = SegmentMergeStatus.STAGE_DELETING;
            totalRecords = allsegdirs.size();
            processedRecords = 0L;
            LOG.info("* Deleting old segments...");
            for (int k = 0; k < allsegdirs.size(); k++) {
                processedRecords++;
                FileUtil.fullyDelete((File) allsegdirs.get(k));
            }
        }
        delta = System.currentTimeMillis() - start;
        float eps = (float) total / (float) (delta / 1000);
        LOG.info("Finished SegmentMergeTool: INPUT: " + total + " -> OUTPUT: " + outputCnt + " entries in "
                + ((float) delta / 1000f) + " s (" + eps + " entries/sec).");
    } catch (Exception e) {
        e.printStackTrace();
        LOG.severe(e.getMessage());
    }
}

From source file:org.apache.roller.weblogger.business.search.IndexManagerImpl.java

License:Apache License

private IndexOperation getSaveIndexOperation() {
    return new WriteToIndexOperation(this) {
        public void doRun() {
            Directory dir = getIndexDirectory();
            Directory fsdir = getFSDirectory(true);
            IndexWriter writer = null;
            try {
                IndexWriterConfig config = new IndexWriterConfig(FieldConstants.LUCENE_VERSION,
                        new LimitTokenCountAnalyzer(IndexManagerImpl.getAnalyzer(),
                                IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL));
                writer = new IndexWriter(fsdir, config);
                writer.addIndexes(new Directory[] { dir });
                writer.commit();/*w  w w .j  ava  2 s  .co  m*/
                indexConsistencyMarker.delete();
            } catch (IOException e) {
                mLogger.error("Problem saving index to disk", e);
                // Delete the directory, since there was a problem saving the RAM contents
                getFSDirectory(true);
            } finally {
                try {
                    if (writer != null) {
                        writer.close();
                    }
                } catch (IOException e1) {
                    mLogger.warn("Unable to close IndexWriter.");
                }
            }
        }
    };
}