Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:com.adanac.module.blog.search.LuceneHelper.java

License:Apache License

private static void generateIndex(String path, String id, String title, String content,
        List<Map<String, String>> dataList) {
    try {/*from ww  w .j  av  a2s . co  m*/
        Directory dir = FSDirectory.open(Paths.get(INDEX_PATH + path));
        Analyzer analyzer = new SmartChineseAnalyzer();
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        indexWriterConfig.setOpenMode(OpenMode.CREATE);
        IndexWriter writer = new IndexWriter(dir, indexWriterConfig);
        for (Map<String, String> data : dataList) {
            Document document = new Document();
            Field idField = new IntField("id", Integer.valueOf(data.get(id)), Field.Store.YES);
            Field indexedContentField = new TextField("indexedContent",
                    data.get(title) + SEPARATOR + data.get(content), Field.Store.YES);
            document.add(idField);
            document.add(indexedContentField);
            writer.addDocument(document);
            if (logger.isInfoEnabled()) {
                logger.info("add index for : [" + data.get(title) + "]");
            }
        }
        writer.close();
    } catch (Exception e) {
        logger.error("add index failed ...", e);
    }
}

From source file:com.admarketplace.isg.lucene.demo.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from  ww w  .  j av  a2 s . co  m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.aliasi.lingmed.medline.IndexMedline.java

License:Lingpipe license

/**
 * Run the command.  See class documentation above for details on
 * arguments and behavior.//from w  w w .ja va2s.  c om
 */
public void run() {
    System.out.println("start run");
    try {
        File[] files = getLaterFiles(mDistDir);
        System.out.println("Total files to process: " + files.length);
        System.out.println("File names: " + java.util.Arrays.asList(files));
        //            if (mLogger.isDebugEnabled())
        //                mLogger.debug("File names: " + java.util.Arrays.asList(files));
        if (files.length > 0) {
            MedlineParser parser = new MedlineParser(true); // true = save raw XML

            Directory fsDir = FSDirectory.open(mIndex);
            IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36, mCodec.getAnalyzer());
            iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            iwConf.setRAMBufferSizeMB(RAM_BUF_SIZE);
            if (sIsBaseline) {
                LogDocMergePolicy ldmp = new LogDocMergePolicy();
                ldmp.setMergeFactor(MERGE_FACTOR_HI);
                iwConf.setMergePolicy(ldmp);
            }
            IndexWriter indexWriter = new IndexWriter(fsDir, iwConf);

            for (File file : files) {
                System.out.println("processing file: " + file);
                MedlineIndexer indexer = new MedlineIndexer(indexWriter, mCodec);
                parser.setHandler(indexer);
                parseFile(parser, file);
                indexer.close();
                recordFile(indexWriter, file.getName());
                System.out.println("completed processing file: " + file);
            }
            System.out.println("All files parsed, now optimize index");
            indexWriter.forceMerge(1);
            indexWriter.commit();
            indexWriter.close();
        }
        System.out.println("Processing complete.");
    } catch (Exception e) {
        //            mLogger.warn("Unexpected Exception: "+e.getMessage());
        //            mLogger.warn("stack trace: "+Logging.logStackTrace(e));
        //            mLogger.warn("Aborting this run");
        IllegalStateException e2 = new IllegalStateException(e.getMessage());
        e2.setStackTrace(e.getStackTrace());
        throw e2;
    }
}

From source file:com.aliasi.lingmed.medline.OptimizeMedline.java

License:Lingpipe license

/**
 * Run the command.  See class documentation above for details on
 * arguments and behavior.//from w w  w  .  ja v a2s . c o m
 */
public void run() {
    //        mLogger.info("start run");
    try {
        Directory fsDir = FSDirectory.open(mIndex);
        IndexWriterConfig iwConf = new IndexWriterConfig(Version.LUCENE_36,
                new StandardAnalyzer(Version.LUCENE_36));
        iwConf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

        IndexWriter indexWriter = new IndexWriter(fsDir, iwConf);
        indexWriter.forceMerge(1);
        indexWriter.commit();
        //            mLogger.info("Processing complete.");
    } catch (Exception e) {
        //            mLogger.warn("Unexpected Exception: "+e.getMessage());
        //            mLogger.warn("stack trace: "+Logging.logStackTrace(e));
        //            mLogger.warn("Aborting this run");
        IllegalStateException e2 = new IllegalStateException(e.getMessage());
        e2.setStackTrace(e.getStackTrace());
        throw e2;
    }
}

From source file:com.aliasi.lingmed.medline.SearchableMedlineCodec.java

License:Lingpipe license

public static void main(String[] args) throws Exception {
    org.apache.lucene.store.RAMDirectory directory = new org.apache.lucene.store.RAMDirectory();

    // org.apache.lucene.analysis.SimpleAnalyzer analyzer 
    // = new org.apache.lucene.analysis.SimpleAnalyzer();
    // org.apache.lucene.analysis.KeywordAnalyzer analyzer 
    // = new org.apache.lucene.analysis.KeywordAnalyzer();
    MedlineCodec codec = new MedlineCodec();
    Analyzer analyzer = codec.getAnalyzer();

    org.apache.lucene.index.IndexWriterConfig iwConf = new org.apache.lucene.index.IndexWriterConfig(
            org.apache.lucene.util.Version.LUCENE_36, analyzer);
    iwConf.setOpenMode(org.apache.lucene.index.IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    org.apache.lucene.index.IndexWriter indexWriter = new org.apache.lucene.index.IndexWriter(directory,
            iwConf);/*from   w ww . j a  va  2s.  c  om*/

    Document doc = new Document();
    doc.add(new Field(Fields.MESH_MINOR_FIELD, "abc", Field.Store.NO, Field.Index.ANALYZED));
    doc.add(new Field(Fields.MESH_MINOR_FIELD, " xyz efg", Field.Store.NO, Field.Index.ANALYZED));
    indexWriter.addDocument(doc);
    indexWriter.close();

    org.apache.lucene.index.IndexReader reader = org.apache.lucene.index.IndexReader.open(directory);
    org.apache.lucene.search.IndexSearcher searcher = new org.apache.lucene.search.IndexSearcher(reader);

    org.apache.lucene.queryParser.QueryParser qp = new org.apache.lucene.queryParser.QueryParser(
            org.apache.lucene.util.Version.LUCENE_36, "foo", analyzer);
    org.apache.lucene.search.Query query = qp.parse(Fields.MESH_MINOR_FIELD + ":efg");

    org.apache.lucene.search.TopDocs hits = searcher.search(query, 1000);
    System.out.println("hits.length()=" + hits.scoreDocs.length);

    org.apache.lucene.analysis.TokenStream ts = analyzer.tokenStream(Fields.MESH_MINOR_FIELD,
            new java.io.StringReader("abc xyz efg"));
    org.apache.lucene.analysis.tokenattributes.CharTermAttribute terms = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.CharTermAttribute.class);
    org.apache.lucene.analysis.tokenattributes.OffsetAttribute offsets = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.OffsetAttribute.class);
    org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute positions = ts
            .addAttribute(org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute.class);

    while (ts.incrementToken()) {
        int increment = positions.getPositionIncrement();
        int start = offsets.startOffset();
        int end = offsets.endOffset();
        String term = terms.toString();
        System.out.println("token=|" + term + "|" + " startOffset=" + start + " endOffset=" + end
                + " positionIncr=" + increment);
    }
}

From source file:com.aurel.track.lucene.index.LuceneIndexer.java

License:Open Source License

/**
 * Initializes an IndexWriter.//from w ww . j  av  a  2s .  co m
 * It will be called from the following places:
 *    -   on system startup workItemWriter should be initialized!
 *                   created = false if no reindex at startup
 *                   created = true if reindex at startup
 *    -   before explicit reIndexing: created = true
 *    -   after reindexing: create = false;
 *    During the adding/editing/deleting of index data the IndexWriter should be initialized with created = false!
 * @param created
 * @param index
 */
public static IndexWriter initWriter(boolean created, int index) {
    Directory indexDirectory = LuceneUtil.getIndexDirectory(index);
    if (indexDirectory == null) {
        LOGGER.error("Can not find or create the index directory for workitems");
        return null;
    }
    Analyzer analyzer = LuceneUtil.getAnalyzer();
    if (analyzer == null) {
        LOGGER.error("Analyzer is null");
        return null;
    }
    IndexWriter indexWriter = getIndexWriter(index);
    if (indexWriter != null) {
        try {
            //release the lock
            indexWriter.close();
        } catch (IOException e) {
            LOGGER.error("Closing the IndexWriter for index " + index + " failed with " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
        }
    }
    try {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
        OpenMode openMode = null;
        if (created) {
            openMode = OpenMode.CREATE;
        } else {
            openMode = OpenMode.APPEND;
        }
        indexWriterConfig.setOpenMode(openMode);
        indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
    } catch (OverlappingFileLockException e) {
        if (LOGGER.isDebugEnabled()) {
            LOGGER.debug("OverlappingFileLockException " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
        }
        if (!created) {
            //try again this time with created = true
            try {
                //open for create
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
                indexWriterConfig.setOpenMode(OpenMode.CREATE);
                //indexWriter = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
                indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
                //close it in order to reopen it for modifications
                indexWriter.close();
            } catch (IOException e1) {
                LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
            //try again this time with created = false
            try {
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
                indexWriterConfig.setOpenMode(OpenMode.APPEND);
                //indexWriter = new IndexWriter(indexDirectory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
                indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
            } catch (IOException e1) {
                LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
        } else {
            LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
        }
    } catch (IOException e) {
        //tried probably with created = false, when the index structure doesn't exist yet
        //it is the case when by startup the useLucene is active but reindexOnStartup not
        //we should try to open the writers with false (for modifications) in order to not to destroy the possible existing index
        //but when the index doesn't exists yet the opening of the writer with false fails. And this is the case now.
        if (!created) {
            //try again this time with created = true
            try {
                //open for create
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
                indexWriterConfig.setOpenMode(OpenMode.CREATE);
                //indexWriter = new IndexWriter(indexDirectory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
                indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
                //close it in order to reopen it for modifications
                indexWriter.close();
            } catch (IOException e1) {
                LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
            //try again this time with created = false
            try {
                IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
                indexWriterConfig.setOpenMode(OpenMode.APPEND);
                //indexWriter = new IndexWriter(indexDirectory, analyzer, false, IndexWriter.MaxFieldLength.UNLIMITED);
                indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
            } catch (IOException e1) {
                LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage());
                LOGGER.debug(ExceptionUtils.getStackTrace(e));
            }
        } else {
            LOGGER.error("Creating the IndexWriter for index " + index + " failed with " + e.getMessage());
            LOGGER.debug(ExceptionUtils.getStackTrace(e));
        }
    }
    switch (index) {
    case LuceneUtil.INDEXES.WORKITEM_INDEX:
        workItemWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.NOT_LOCALIZED_LIST_INDEX:
        notLocalizedLookupWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.LOCALIZED_LIST_INDEX:
        localizedLookupWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.EXTERNAL_LOOKUP_WRITER:
        externalLookupWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.ATTACHMENT_INDEX:
        attachmentWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.EXPENSE_INDEX:
        expenseWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.BUDGET_PLAN_INDEX:
        budgetPlanWriter = indexWriter;
        break;
    case LuceneUtil.INDEXES.LINK_INDEX:
        linkWriter = indexWriter;
        break;
    default:
        return null;
    }
    return indexWriter;
}

From source file:com.b2international.index.compat.SingleDirectoryIndexImpl.java

License:Apache License

protected void initLucene(final File indexDirectory, final boolean clean) {
    try {/*from  www  .j  av a  2 s  . c  o m*/
        this.directory = Directories.openFile(indexDirectory.toPath());
        final Analyzer analyzer = new ComponentTermAnalyzer();
        final IndexWriterConfig config = new IndexWriterConfig(analyzer);
        config.setOpenMode(clean ? OpenMode.CREATE : OpenMode.CREATE_OR_APPEND);
        config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(config.getIndexDeletionPolicy()));
        this.writer = new IndexWriter(directory, config);
        this.writer.commit(); // Create index if it didn't exist
        this.manager = new SearcherManager(directory, new SearchWarmerFactory());
    } catch (final IOException e) {
        throw new RuntimeException(e.getMessage(), e);
    }
}

From source file:com.baidu.rigel.biplatform.tesseract.isservice.index.service.IndexWriterFactory.java

License:Open Source License

/**
 * /*from  ww w  . java 2s.co  m*/
 * getIndexWriter
 * 
 * @param idxPath
 *            
 * @return IndexWriter
 * @throws IOException
 *             IO
 */
public static synchronized IndexWriter getIndexWriter(String idxPath) throws IOException {
    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "getIndexWriter",
            "[idxPath:" + idxPath + "]"));
    IndexWriter indexWriter = null;
    if (INSTANCE.idxWriterMaps.containsKey(idxPath)) {
        indexWriter = INSTANCE.idxWriterMaps.get(idxPath);
        LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "getIndexWriter",
                "return exist IndexWriter "));
    } else {
        File indexFile = new File(idxPath);
        Directory directory = FSDirectory.open(indexFile);
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_1,
                new StandardAnalyzer());
        indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        indexWriterConfig.setRAMBufferSizeMB(64.0);
        indexWriterConfig.setMaxBufferedDocs(IndexWriterConfig.DISABLE_AUTO_FLUSH);
        indexWriter = new IndexWriter(directory, indexWriterConfig);

        INSTANCE.idxWriterMaps.put(idxPath, indexWriter);
        LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_PROCESS_NO_PARAM, "getIndexWriter",
                "create new IndexWriter "));
    }
    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "getIndexWriter",
            "[idxPath:" + idxPath + "]"));
    return indexWriter;
}

From source file:com.baidu.rigel.biplatform.tesseract.isservice.index.service.IndexWriterFactory.java

License:Open Source License

public static synchronized IndexWriter getIndexWriterWithSingleSlot(String idxPath) throws IOException {
    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_BEGIN, "getIndexWriter",
            "[idxPath:" + idxPath + "]"));
    IndexWriter indexWriter = null;//from  ww w. ja  v  a  2 s  .c om
    Integer maxSlot = 0;
    if (INSTANCE.idxMaps.containsKey(idxPath)) {
        maxSlot = INSTANCE.idxMaps.get(idxPath);
        maxSlot++;
    }

    File indexFile = new File(idxPath + File.separator + maxSlot);
    Directory directory = FSDirectory.open(indexFile);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_10_1, new StandardAnalyzer());
    indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    indexWriterConfig.setRAMBufferSizeMB(48.0);
    indexWriter = new IndexWriter(directory, indexWriterConfig);

    INSTANCE.idxMaps.put(idxPath, maxSlot);

    LOGGER.info(String.format(LogInfoConstants.INFO_PATTERN_FUNCTION_END, "getIndexWriter",
            "[idxPath:" + idxPath + "]"));
    return indexWriter;
}

From source file:com.bala.learning.learning.luence.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;/* ww  w .  j av a 2 s .  co  m*/
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}