Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:org.ohdsi.usagi.tests.TestLucene.java

License:Apache License

public static void main(String[] args) throws IOException, ParseException {
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9);
    //Analyzer analyzer = new UsagiAnalyzer();
    FieldType textVectorField = new FieldType();
    textVectorField.setIndexed(true);/*from www. j  a v  a 2 s  . c  o  m*/
    textVectorField.setTokenized(true);
    textVectorField.setStoreTermVectors(true);
    textVectorField.setStoreTermVectorPositions(false);
    textVectorField.setStoreTermVectorPayloads(false);
    textVectorField.setStoreTermVectorOffsets(false);
    textVectorField.setStored(true);
    textVectorField.freeze();

    File indexFolder = new File(folder);
    if (indexFolder.exists())
        DirectoryUtilities.deleteDir(indexFolder);

    Directory dir = FSDirectory.open(indexFolder);

    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);
    Document doc = new Document();
    doc.add(new Field("F", "word1 word2 w3 word4", textVectorField));
    writer.addDocument(doc);
    doc = new Document();
    doc.add(new Field("F", "word1 word2 w3", textVectorField));
    writer.addDocument(doc);

    writer.close();

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder)));
    for (int i = 0; i < reader.numDocs(); i++) {
        TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null);
        BytesRef text;
        while ((text = termsEnum.next()) != null) {
            System.out.print(text.utf8ToString() + ",");
        }
        System.out.println();
    }
    IndexSearcher searcher = new IndexSearcher(reader);

    // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader());
    // mlt.setMinTermFreq(0);
    // mlt.setMinDocFreq(0);
    // mlt.setMaxDocFreq(9999);
    // mlt.setMinWordLen(0);
    // mlt.setMaxWordLen(9999);
    // mlt.setMaxDocFreqPct(100);
    // mlt.setMaxNumTokensParsed(9999);
    // mlt.setMaxQueryTerms(9999);
    // mlt.setStopWords(null);
    // mlt.setFieldNames(new String[] { "F" });
    // mlt.setAnalyzer(new UsagiAnalyzer());
    // Query query = mlt.like("F", new StringReader("Systolic blood pressure"));
    QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer);
    Query query = parser.parse("word1");

    Explanation explanation = searcher.explain(query, 0);
    print(explanation);
    System.out.println();
    explanation = searcher.explain(query, 1);
    print(explanation);
    System.out.println();

    TopDocs topDocs = searcher.search(query, 99);
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F"));
    }
}

From source file:org.ohdsi.usagi.UsagiSearchEngine.java

License:Apache License

public void createNewMainIndex() {
    try {//w w w . j ava  2  s  .  co m
        File indexFolder = new File(folder + "/" + MAIN_INDEX_FOLDER);
        if (indexFolder.exists())
            DirectoryUtilities.deleteDir(indexFolder);

        Directory dir = FSDirectory.open(indexFolder);

        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, new UsagiAnalyzer());
        iwc.setOpenMode(OpenMode.CREATE);
        iwc.setRAMBufferSizeMB(256.0);
        writer = new IndexWriter(dir, iwc);

    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.ojbc.adapters.analyticaldatastore.personid.IndexedIdentifierGenerationStrategy.java

License:RPL License

private void init() throws Exception {
    Directory indexDirectory = FSDirectory.open(new File(indexDirectoryPath));
    log.info("Set Lucene index directory to " + indexDirectory.toString());
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));
    indexWriter = new IndexWriter(indexDirectory, config);
}

From source file:org.ojbc.adapters.analyticaldatastore.util.LuceneUtils.java

License:RPL License

public static void main(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println("Must provide source and target index directories as command line arguments");
        System.exit(1);/*w  w  w  .ja v  a  2s. c  o  m*/
    }
    Directory sourceDir = FSDirectory.open(new File(args[0]));
    DirectoryReader reader = DirectoryReader.open(sourceDir);

    Directory targetDir = FSDirectory.open(new File(args[1]));
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()));
    IndexWriter writer = new IndexWriter(targetDir, config);

    Set<String> allowedFields = new HashSet<String>();
    allowedFields.add(IdentifierGenerationStrategy.FIRST_NAME_FIELD);
    allowedFields.add(IdentifierGenerationStrategy.LAST_NAME_FIELD);
    allowedFields.add(IdentifierGenerationStrategy.MIDDLE_NAME_FIELD);
    allowedFields.add(IdentifierGenerationStrategy.BIRTHDATE_FIELD);
    allowedFields.add(IdentifierGenerationStrategy.SEX_FIELD);
    allowedFields.add(IdentifierGenerationStrategy.SSN_FIELD);
    allowedFields.add(IdentifierGenerationStrategy.ID_FIELD);

    try {
        int lastDocumentIndex = reader.maxDoc();
        for (int i = 0; i < lastDocumentIndex; i++) {
            Document d = reader.document(i);
            Document newDoc = new Document();
            List<IndexableField> fields = d.getFields();
            for (IndexableField f : fields) {
                String fieldName = f.name();
                String fieldValue = f.stringValue();
                if (allowedFields.contains(fieldName)) {
                    newDoc.add(new StringField(fieldName, fieldValue, Store.YES));
                }
            }
            writer.addDocument(newDoc);
            writer.commit();
        }
    } finally {
        reader.close();
        writer.close();
    }
}

From source file:org.olat.search.service.indexer.JmsIndexer.java

License:Apache License

public IndexWriterConfig newIndexWriterConfig() {
    Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION);
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer);
    indexWriterConfig.setMergePolicy(newLogMergePolicy());
    indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster")
    indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    return indexWriterConfig;
}

From source file:org.open.crs.service.lucene.IndexFiles.java

License:Apache License

public IndexFiles(String indexPath) throws IOException {
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //        Analyzer analyzer = new StandardAnalyzer();
    //        Analyzer analyzer = new CJKAnalyzer();
    Analyzer analyzer = new SmartChineseAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {//from w  w  w  .  ja v a2  s .  co m
        // Add new documents to an existing index:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }

    // Optional: for better indexing performance, if you
    // are indexing many documents, increase the RAM
    // buffer.  But if you do this, increase the max heap
    // size to the JVM (eg add -Xmx512m or -Xmx1g):
    //
    // iwc.setRAMBufferSizeMB(256.0);

    writer = new IndexWriter(dir, iwc);
}

From source file:org.opencms.search.CmsSearchIndex.java

License:Open Source License

/**
 * Creates a new index writer.<p>//w w  w . j av  a 2 s. c  o m
 * 
 * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated
 * 
 * @return the created new index writer
 * 
 * @throws CmsIndexException in case the writer could not be created
 * 
 * @see #getIndexWriter(I_CmsReport, boolean)
 */
protected I_CmsIndexWriter indexWriterCreate(boolean create) throws CmsIndexException {

    IndexWriter indexWriter;
    try {
        // check if the target directory already exists
        File f = new File(m_path);
        if (!f.exists()) {
            // index does not exist yet
            f = f.getParentFile();
            if ((f != null) && !f.exists()) {
                // create the parent folders if required
                f.mkdirs();
            }
            // create must be true if the directory does not exist
            create = true;
        }

        // open file directory for Lucene
        FSDirectory dir = FSDirectory.open(new File(m_path));
        // create Lucene merge policy
        LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
        if (m_luceneMaxMergeDocs != null) {
            mergePolicy.setMaxMergeDocs(m_luceneMaxMergeDocs.intValue());
        }
        if (m_luceneMergeFactor != null) {
            mergePolicy.setMergeFactor(m_luceneMergeFactor.intValue());
        }
        if (m_luceneUseCompoundFile != null) {
            mergePolicy.setUseCompoundFile(m_luceneUseCompoundFile.booleanValue());
        }
        // create a new Lucene index configuration
        IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, getAnalyzer());
        // set the index configuration parameters if required 
        if (m_luceneRAMBufferSizeMB != null) {
            indexConfig.setRAMBufferSizeMB(m_luceneRAMBufferSizeMB.doubleValue());
        }
        if (create) {
            indexConfig.setOpenMode(OpenMode.CREATE);
        } else {
            indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        // create the index
        indexWriter = new IndexWriter(dir, indexConfig);
    } catch (Exception e) {
        throw new CmsIndexException(
                Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name), e);
    }
    return new CmsLuceneIndexWriter(indexWriter, this);
}

From source file:org.openeclass.lucene.demo.IndexCourses.java

License:Open Source License

@SuppressWarnings("deprecation")
public static void main(String[] args) {

    String usage = "java org.openeclass.lucene.demo.IndexCourses" + " [-index INDEX_PATH] [-update]\n\n"
            + "This indexes the courses in Eclass DB, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";

    String indexPath = "data/eclass-index";
    boolean create = true;
    boolean help = false;

    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];/*from   w  w w  . j a va 2  s. co  m*/
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        } else if ("-help".equals(args[i])) {
            help = true;
        }
    }

    if (help) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    Date start = new Date();
    try {

        System.out.println("Opening Database connection ...");

        Properties props = PropertyLoader.loadProperties("project-properties.xml");
        Connection con = DriverManager.getConnection(props.getProperty("jdbcurl"), props.getProperty("user"),
                props.getProperty("password"));
        con.setAutoCommit(false);

        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_23);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_23, analyzer);

        if (create) {
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexCourses(writer, con);

        writer.close();
        con.commit();
        con.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    } catch (SQLException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        e.printStackTrace();
    }

}

From source file:org.openerproject.targetproperties.svector.indexing.CustomLuceneIndexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String indexPath, String docsPath, boolean create) {
    //    String usage = "java org.apache.lucene.demo.IndexFiles"
    //                 + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
    //                 + "This indexes the documents in DOCS_PATH, creating a Lucene index"
    //                 + "in INDEX_PATH that can be searched with SearchFiles";
    //    String indexPath = "index";
    //    String docsPath = null;
    //    boolean create = true;
    //    for(int i=0;i<args.length;i++) {
    //      if ("-index".equals(args[i])) {
    //        indexPath = args[i+1];
    //        i++;
    //      } else if ("-docs".equals(args[i])) {
    //        docsPath = args[i+1];
    //        i++;
    //      } else if ("-update".equals(args[i])) {
    //        create = false;
    //      }//from   w  w  w  . j  ava 2 s. c om
    //    }

    //    if (docsPath == null) {
    //      System.err.println("Usage: " + usage);
    //      System.exit(1);
    //    }

    final File docDir = new File(docsPath);
    //    if (!docDir.exists() || !docDir.canRead()) {
    //      System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path");
    //      System.exit(1);
    //    }

    Date start = new Date();
    try {
        log.info("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION);
        IndexWriterConfig iwc = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:org.opengrok.indexer.index.IndexDatabase.java

License:Open Source License

/**
 * Update the content of this index database
 *
 * @throws IOException if an error occurs
 *///from w w  w.  j  av  a2  s  .  co  m
public void update() throws IOException {
    synchronized (lock) {
        if (running) {
            throw new IOException("Indexer already running!");
        }
        running = true;
        interrupted = false;
    }

    RuntimeEnvironment env = RuntimeEnvironment.getInstance();

    reader = null;
    writer = null;
    settings = null;
    uidIter = null;
    postsIter = null;
    acceptedNonlocalSymlinks.clear();

    IOException finishingException = null;
    try {
        Analyzer analyzer = AnalyzerGuru.getAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(env.getRamBufferSize());
        /**
         * Most data in OpenGrok is indexed but not stored, so use the best
         * compression on the minority of data that is stored, since it
         * should not have a detrimental impact on overall throughput.
         */
        iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION));
        writer = new IndexWriter(indexDirectory, iwc);
        writer.commit(); // to make sure index exists on the disk
        completer = new PendingFileCompleter();

        if (directories.isEmpty()) {
            if (project == null) {
                directories.add("");
            } else {
                directories.add(project.getPath());
            }
        }

        for (String dir : directories) {
            File sourceRoot;
            if ("".equals(dir)) {
                sourceRoot = env.getSourceRootFile();
            } else {
                sourceRoot = new File(env.getSourceRootFile(), dir);
            }

            if (env.isHistoryEnabled()) {
                try {
                    HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot);
                } catch (HistoryException ex) {
                    String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot);
                    LOGGER.log(Level.SEVERE, exmsg, ex);
                    continue;
                }
            }

            dir = Util.fixPathIfWindows(dir);

            String startuid = Util.path2uid(dir, "");
            reader = DirectoryReader.open(indexDirectory); // open existing index
            settings = readAnalysisSettings();
            if (settings == null) {
                settings = new IndexAnalysisSettings();
            }
            Terms terms = null;
            int numDocs = reader.numDocs();
            if (numDocs > 0) {
                Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0);
                terms = uFields.terms(QueryBuilder.U);
            }

            try {
                if (terms != null) {
                    uidIter = terms.iterator();
                    TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid
                    if (stat == TermsEnum.SeekStatus.END) {
                        uidIter = null;
                        LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?",
                                startuid);
                    }
                }

                // The actual indexing happens in indexParallel().

                IndexDownArgs args = new IndexDownArgs();
                Statistics elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir);
                indexDown(sourceRoot, dir, args);
                showFileCount(dir, args, elapsed);

                args.cur_count = 0;
                elapsed = new Statistics();
                LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir);
                indexParallel(dir, args);
                elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir));

                // Remove data for the trailing terms that indexDown()
                // did not traverse. These correspond to files that have been
                // removed and have higher ordering than any present files.
                while (uidIter != null && uidIter.term() != null
                        && uidIter.term().utf8ToString().startsWith(startuid)) {

                    removeFile(true);
                    BytesRef next = uidIter.next();
                    if (next == null) {
                        uidIter = null;
                    }
                }

                markProjectIndexed(project);
            } finally {
                reader.close();
            }
        }

        try {
            finishWriting();
        } catch (IOException e) {
            finishingException = e;
        }
    } catch (RuntimeException ex) {
        LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex);
        throw ex;
    } finally {
        completer = null;
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            if (finishingException == null) {
                finishingException = e;
            }
            LOGGER.log(Level.WARNING, "An error occurred while closing writer", e);
        } finally {
            writer = null;
            synchronized (lock) {
                running = false;
            }
        }
    }

    if (finishingException != null) {
        throw finishingException;
    }

    if (!isInterrupted() && isDirty()) {
        if (env.isOptimizeDatabase()) {
            optimize();
        }
        env.setIndexTimestamp();
    }
}