Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode)

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Updates the index//from   www .jav  a 2s.  c  om
 *
 * @throws IOException
 */
public void updateIndex() throws IOException {
    try {
        long startTime = System.nanoTime();
        // Get the index directory
        Directory dir = FSDirectory.open(Paths.get(indexDirectory));
        // Get the directory for resources
        String resourcesDir = resourceDirectory + "/" + Constants.CSV_LOCATION;
        // Get PDF Analyzer
        Analyzer pdf_analyzer = new PDFAnalyzer(resourceDirectory + "/" + Constants.STOPWORDS_FILE);
        // Create an index writer config with the analyzer
        IndexWriterConfig iwc = new IndexWriterConfig(pdf_analyzer);
        // Set the open mode to create or append
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); // Set index to be created
        // Create an index writer
        IndexWriter writer = new IndexWriter(dir, iwc);
        // Index the documents
        indexDocs(writer, resourcesDir);
        long endTime = System.nanoTime();
        LuceneIndexReader.getInstance().initializeIndexReader(writer);
        writer.close();
        log.info("Took: " + (endTime - startTime) / Math.pow(10, 6) + " milliseconds to generate the index.");
    } catch (IOException e) {
        log.error("IO Exception Thrown while updating index " + e.getMessage() + "\n");
    }

}

From source file:Application.mediaIndexer.java

public static void IndexFiles(String index, String docsPath, TextArea results, boolean CreateOrUpdate,
        boolean removeFiles) throws IOException {
    String indexPath = index;//from  ww  w.j av a  2  s.c  om
    boolean create = CreateOrUpdate;
    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir))
        results.appendText("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path" + "\n");
    Date start = new Date();
    try {
        if (removeFiles)
            results.appendText("Deleting '" + docsPath + "' from directory '" + indexPath + "'..." + "\n");
        // else results.appendText("results '" + docsPath + "' to directory
        // '" + indexPath + "'..." + "\n");
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (create)
            iwc.setOpenMode(OpenMode.CREATE);
        else
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        // Optional: for better results performance, if you are results many
        // documents, increase the RAM buffer.
        // But if you do this, increase the max heap size to the JVM (eg add
        // -Xmx512m or -Xmx1g):
        // iwc.setRAMBufferSizeMB(2048.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir, results, removeFiles);
        writer.close();
        Date end = new Date();
        long diffInSeconds = (end.getTime() - start.getTime()) / 1000;
        results.appendText(diffInSeconds + " total seconds" + "\n");
    } catch (SAXException e) {
        e.printStackTrace();
    } catch (TikaException e) {
        e.printStackTrace();
    }
}

From source file:apps.LuceneIndexer.java

License:Apache License

public static void main(String[] args) {
    Options options = new Options();

    options.addOption("i", null, true, "input file");
    options.addOption("o", null, true, "output directory");
    options.addOption("r", null, true, "optional output TREC-format QREL file");

    options.addOption("bm25_b", null, true, "BM25 parameter: b");
    options.addOption("bm25_k1", null, true, "BM25 parameter: k1");
    options.addOption("bm25fixed", null, false, "use the fixed BM25 similarity");

    Joiner commaJoin = Joiner.on(',');
    Joiner spaceJoin = Joiner.on(' ');

    options.addOption("source_type", null, true,
            "document source type: " + commaJoin.join(SourceFactory.getDocSourceList()));

    // If you increase this value, you may need to modify the following line in *.sh file
    // export MAVEN_OPTS="-Xms8192m -server"
    double ramBufferSizeMB = 1024 * 8; // 8 GB

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    IndexWriter indexWriter = null;/*from w w  w  .  j  a va2s.  com*/
    BufferedWriter qrelWriter = null;

    int docNum = 0;

    try {
        CommandLine cmd = parser.parse(options, args);

        String inputFileName = null, outputDirName = null, qrelFileName = null;

        if (cmd.hasOption("i")) {
            inputFileName = cmd.getOptionValue("i");
        } else {
            Usage("Specify 'input file'", options);
        }

        if (cmd.hasOption("o")) {
            outputDirName = cmd.getOptionValue("o");
        } else {
            Usage("Specify 'index directory'", options);
        }

        if (cmd.hasOption("r")) {
            qrelFileName = cmd.getOptionValue("r");
        }

        String sourceName = cmd.getOptionValue("source_type");

        if (sourceName == null)
            Usage("Specify document source type", options);

        if (qrelFileName != null)
            qrelWriter = new BufferedWriter(new FileWriter(qrelFileName));

        File outputDir = new File(outputDirName);
        if (!outputDir.exists()) {
            if (!outputDir.mkdirs()) {
                System.out.println("couldn't create " + outputDir.getAbsolutePath());
                System.exit(1);
            }
        }
        if (!outputDir.isDirectory()) {
            System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
            System.exit(1);
        }
        if (!outputDir.canWrite()) {
            System.out.println("Can't write to " + outputDir.getAbsolutePath());
            System.exit(1);
        }

        boolean useFixedBM25 = cmd.hasOption("bm25fixed");

        float bm25_k1 = UtilConst.BM25_K1_DEFAULT, bm25_b = UtilConst.BM25_B_DEFAULT;

        if (cmd.hasOption("bm25_k1")) {
            try {
                bm25_k1 = Float.parseFloat(cmd.getOptionValue("bm25_k1"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_k1'", options);
            }
        }

        if (cmd.hasOption("bm25_b")) {
            try {
                bm25_b = Float.parseFloat(cmd.getOptionValue("bm25_b"));
            } catch (NumberFormatException e) {
                Usage("Wrong format for 'bm25_b'", options);
            }
        }

        EnglishAnalyzer analyzer = new EnglishAnalyzer();
        FSDirectory indexDir = FSDirectory.open(Paths.get(outputDirName));
        IndexWriterConfig indexConf = new IndexWriterConfig(analyzer);

        /*
            OpenMode.CREATE creates a new index or overwrites an existing one.
            https://lucene.apache.org/core/6_0_0/core/org/apache/lucene/index/IndexWriterConfig.OpenMode.html#CREATE
        */
        indexConf.setOpenMode(OpenMode.CREATE);
        indexConf.setRAMBufferSizeMB(ramBufferSizeMB);

        System.out.println(String.format("BM25 parameters k1=%f b=%f ", bm25_k1, bm25_b));

        if (useFixedBM25) {
            System.out.println(String.format("Using fixed BM25Simlarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25SimilarityFix(bm25_k1, bm25_b));
        } else {
            System.out.println(String.format("Using Lucene BM25Similarity, k1=%f b=%f", bm25_k1, bm25_b));
            indexConf.setSimilarity(new BM25Similarity(bm25_k1, bm25_b));
        }

        indexWriter = new IndexWriter(indexDir, indexConf);

        DocumentSource inpDocSource = SourceFactory.createDocumentSource(sourceName, inputFileName);
        DocumentEntry inpDoc = null;
        TextCleaner textCleaner = new TextCleaner(null);

        while ((inpDoc = inpDocSource.next()) != null) {
            ++docNum;

            Document luceneDoc = new Document();
            ArrayList<String> cleanedToks = textCleaner.cleanUp(inpDoc.mDocText);
            String cleanText = spaceJoin.join(cleanedToks);

            //        System.out.println(inpDoc.mDocId);
            //        System.out.println(cleanText);
            //        System.out.println("==============================");

            luceneDoc.add(new StringField(UtilConst.FIELD_ID, inpDoc.mDocId, Field.Store.YES));
            luceneDoc.add(new TextField(UtilConst.FIELD_TEXT, cleanText, Field.Store.YES));
            indexWriter.addDocument(luceneDoc);

            if (inpDoc.mIsRel != null && qrelWriter != null) {
                saveQrelOneEntry(qrelWriter, inpDoc.mQueryId, inpDoc.mDocId, inpDoc.mIsRel ? MAX_GRADE : 0);
            }
            if (docNum % 1000 == 0)
                System.out.println(String.format("Indexed %d documents", docNum));

        }

    } catch (ParseException e) {
        e.printStackTrace();
        Usage("Cannot parse arguments" + e, options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    } finally {
        System.out.println(String.format("Indexed %d documents", docNum));

        try {
            if (null != indexWriter)
                indexWriter.close();
            if (null != qrelWriter)
                qrelWriter.close();
        } catch (IOException e) {
            System.err.println("IO exception: " + e);
            e.printStackTrace();
        }
    }
}

From source file:au.org.ala.names.search.ALANameIndexer.java

License:Open Source License

/**
 * Creates an index writer in the specified directory.  It will create/recreate
 * the target directory/*  w ww . ja v a  2s.  c  om*/
 *
 * @param directory
 * @param analyzer
 * @return
 * @throws Exception
 */
protected IndexWriter createIndexWriter(File directory, Analyzer analyzer, boolean replace) throws Exception {
    IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_34, analyzer);
    if (replace)
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    else
        conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    if (directory.exists() && replace) {
        FileUtils.forceDelete(directory);
    }
    FileUtils.forceMkdir(directory);
    IndexWriter iw = new IndexWriter(FSDirectory.open(directory), conf);
    return iw;
}

From source file:back.Indexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = ".\\indexed";
    String docsPath = ".//artigos";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];/*from   w ww . jav a  2 s  .co m*/
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT,
                new CharArraySet(Version.LUCENE_CURRENT, 0, false));
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:biospectra.index.Indexer.java

License:Apache License

private void initialize(File indexPath, int kmerSize, boolean minStrandKmer, Similarity similarity,
        int workerThreads, int ramBufferSize) throws Exception {
    if (!indexPath.exists()) {
        indexPath.mkdirs();/*from ww w .  j  a v a  2 s. c o m*/
    }

    if (indexPath.exists()) {
        cleanUpDirectory(indexPath);
    }

    this.indexPath = indexPath;
    this.minStrandKmer = minStrandKmer;
    this.analyzer = new KmerIndexAnalyzer(kmerSize, minStrandKmer);
    Directory dir = new MMapDirectory(this.indexPath.toPath());
    IndexWriterConfig config = new IndexWriterConfig(this.analyzer);
    if (similarity != null) {
        config.setSimilarity(similarity);
    }

    this.workerThreads = workerThreads;

    if (ramBufferSize > 0) {
        config.setRAMBufferSizeMB(ramBufferSize);
    }

    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    this.indexWriter = new IndexWriter(dir, config);

    this.executor = new BlockingExecutor(this.workerThreads, this.workerThreads * 2);

    for (int i = 0; i < this.workerThreads; i++) {
        Document doc = new Document();
        Field filenameField = new StringField(IndexConstants.FIELD_FILENAME, "", Field.Store.YES);
        Field headerField = new StringField(IndexConstants.FIELD_HEADER, "", Field.Store.YES);
        Field sequenceDirectionField = new StringField(IndexConstants.FIELD_SEQUENCE_DIRECTION, "",
                Field.Store.YES);
        Field taxonTreeField = new StringField(IndexConstants.FIELD_TAXONOMY_TREE, "", Field.Store.YES);
        Field sequenceField = new TextField(IndexConstants.FIELD_SEQUENCE, "", Field.Store.NO);

        doc.add(filenameField);
        doc.add(headerField);
        doc.add(sequenceDirectionField);
        doc.add(taxonTreeField);
        doc.add(sequenceField);

        this.freeQueue.offer(doc);
    }
}

From source file:br.andrew.lucene.testing.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(final String[] args) {
    final String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    final String indexPath = "index";

    final File docDir = new File("data");

    final Date start = new Date();
    try {/*from   w  w  w. j  a va  2 s  . co  m*/
        System.out.println("Indexing to directory '" + indexPath + "'...");

        final Directory dir = FSDirectory.open(new File(indexPath));
        final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40);
        final IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);

        iwc.setOpenMode(OpenMode.CREATE);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        final IndexWriter writer = new IndexWriter(dir, iwc);
        IndexFiles.indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        final Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (final IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:br.bireme.ngrams.NGIndex.java

private IndexWriter getIndexWriter(final String indexPath, final Analyzer analyzer, final boolean append)
        throws IOException {
    assert indexPath != null;
    assert analyzer != null;

    final File dir = new File(indexPath);
    final Directory directory = FSDirectory.open(dir.toPath());
    final IndexWriterConfig cfg = new IndexWriterConfig(analyzer);

    if (append) {
        cfg.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    } else {/*from  www. j a  v a 2 s  .  c om*/
        new File(dir, "write.lock").delete();
        cfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    return new IndexWriter(directory, cfg);
}

From source file:buscador.IndexFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 *//* w w  w.  j  a  va 2 s . c o  m*/
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "Zaguan1";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new SpanishAnalyzer(Version.LUCENE_44);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:ca.dracode.ais.indexer.FileIndexer.java

License:Open Source License

public FileIndexer() {
    super();/*w w  w . j a va  2  s  .  co m*/
    this.searcher = new FileSearcher();
    Directory dir;
    try {
        dir = FSDirectory.open(new File(FileIndexer.getRootStorageDir()));
        Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_47);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        this.writer = new IndexWriter(dir, iwc);
        this.writer.commit();
    } catch (IOException e) {
        e.printStackTrace();
    }
}