Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:edu.cmu.geolocator.io.GetWriter.java

License:Apache License

public static IndexWriter getIndexWriter(String indexdirectory, double buffersize) throws IOException {
    Directory dir;/*from  w ww.  j  a v a2 s  .com*/
    if (OSUtil.isWindows())
        dir = FSDirectory.open(new File(indexdirectory));
    else
        dir = NIOFSDirectory.open(new File(indexdirectory));

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_45);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);

    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(buffersize);
    LogDocMergePolicy mergePolicy = new LogDocMergePolicy();
    mergePolicy.setMergeFactor(3);
    config.setMergePolicy(mergePolicy);

    IndexWriter writer = new IndexWriter(dir, config);
    return writer;
}

From source file:edu.cmu.lti.huiying.ir.rangedsearch.TableIndexer.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = "/home/huiying/JavaWorkspace/TableSemantics/web/numericalDemo/public/data";
    boolean create = true;
    int nfile = 0;
    // for(int i=0;i<args.length;i++) {
    // if ("-index".equals(args[i])) {
    // indexPath = args[i+1];
    // i++;//from w  ww  .ja v a  2s. c  om
    // } else if ("-docs".equals(args[i])) {
    // docsPath = args[i+1];
    // i++;
    // } else if ("-update".equals(args[i])) {
    // create = false;
    // }
    // }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    TableIndexer tind = new TableIndexer();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        // :Post-Release-Update-Version.LUCENE_XY:
        //Analyzer analyzer = new TableAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_10_0, new StandardAnalyzer());

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        // tind.indexOffsetAnnotation(writer, docDir);
        tind.indexExplodedXml(writer, docDir);
        writer.close();

        Date end = new Date();
        System.out.println("total doc added:" + tind.totalDocAdded);
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:edu.cmu.lti.oaqa.knn4qa.apps.LuceneIndexer.java

License:Apache License

public static void main(String[] args) {
    Options options = new Options();

    options.addOption(CommonParams.ROOT_DIR_PARAM, null, true, CommonParams.ROOT_DIR_DESC);
    options.addOption(CommonParams.SUB_DIR_TYPE_PARAM, null, true, CommonParams.SUB_DIR_TYPE_DESC);
    options.addOption(CommonParams.MAX_NUM_REC_PARAM, null, true, CommonParams.MAX_NUM_REC_DESC);
    options.addOption(CommonParams.SOLR_FILE_NAME_PARAM, null, true, CommonParams.SOLR_FILE_NAME_DESC);
    options.addOption(CommonParams.OUT_INDEX_PARAM, null, true, CommonParams.OUT_MINDEX_DESC);

    CommandLineParser parser = new org.apache.commons.cli.GnuParser();

    try {//from   www. j a v a 2 s  .  c o  m
        CommandLine cmd = parser.parse(options, args);

        String rootDir = null;

        rootDir = cmd.getOptionValue(CommonParams.ROOT_DIR_PARAM);

        if (null == rootDir)
            Usage("Specify: " + CommonParams.ROOT_DIR_DESC, options);

        String outputDirName = cmd.getOptionValue(CommonParams.OUT_INDEX_PARAM);

        if (null == outputDirName)
            Usage("Specify: " + CommonParams.OUT_MINDEX_DESC, options);

        String subDirTypeList = cmd.getOptionValue(CommonParams.SUB_DIR_TYPE_PARAM);

        if (null == subDirTypeList || subDirTypeList.isEmpty())
            Usage("Specify: " + CommonParams.SUB_DIR_TYPE_DESC, options);

        String solrFileName = cmd.getOptionValue(CommonParams.SOLR_FILE_NAME_PARAM);
        if (null == solrFileName)
            Usage("Specify: " + CommonParams.SOLR_FILE_NAME_DESC, options);

        int maxNumRec = Integer.MAX_VALUE;

        String tmp = cmd.getOptionValue(CommonParams.MAX_NUM_REC_PARAM);

        if (tmp != null) {
            try {
                maxNumRec = Integer.parseInt(tmp);
                if (maxNumRec <= 0) {
                    Usage("The maximum number of records should be a positive integer", options);
                }
            } catch (NumberFormatException e) {
                Usage("The maximum number of records should be a positive integer", options);
            }
        }

        File outputDir = new File(outputDirName);
        if (!outputDir.exists()) {
            if (!outputDir.mkdirs()) {
                System.out.println("couldn't create " + outputDir.getAbsolutePath());
                System.exit(1);
            }
        }
        if (!outputDir.isDirectory()) {
            System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
            System.exit(1);
        }
        if (!outputDir.canWrite()) {
            System.out.println("Can't write to " + outputDir.getAbsolutePath());
            System.exit(1);
        }

        String subDirs[] = subDirTypeList.split(",");

        int docNum = 0;

        // No English analyzer here, all language-related processing is done already,
        // here we simply white-space tokenize and index tokens verbatim.
        Analyzer analyzer = new WhitespaceAnalyzer();
        FSDirectory indexDir = FSDirectory.open(outputDir);
        IndexWriterConfig indexConf = new IndexWriterConfig(analyzer.getVersion(), analyzer);

        System.out.println("Creating a new Lucene index, maximum # of docs to process: " + maxNumRec);
        indexConf.setOpenMode(OpenMode.CREATE);
        IndexWriter indexWriter = new IndexWriter(indexDir, indexConf);

        for (int subDirId = 0; subDirId < subDirs.length && docNum < maxNumRec; ++subDirId) {
            String inputFileName = rootDir + "/" + subDirs[subDirId] + "/" + solrFileName;

            System.out.println("Input file name: " + inputFileName);

            BufferedReader inpText = new BufferedReader(
                    new InputStreamReader(CompressUtils.createInputStream(inputFileName)));
            String docText = XmlHelper.readNextXMLIndexEntry(inpText);

            for (; docText != null && docNum < maxNumRec; docText = XmlHelper.readNextXMLIndexEntry(inpText)) {
                ++docNum;
                Map<String, String> docFields = null;

                Document luceneDoc = new Document();

                try {
                    docFields = XmlHelper.parseXMLIndexEntry(docText);
                } catch (Exception e) {
                    System.err.println(String.format("Parsing error, offending DOC #%d:\n%s", docNum, docText));
                    System.exit(1);
                }

                String id = docFields.get(UtilConst.TAG_DOCNO);

                if (id == null) {
                    System.err.println(String.format("No ID tag '%s', offending DOC #%d:\n%s",
                            UtilConst.TAG_DOCNO, docNum, docText));
                }

                luceneDoc.add(new StringField(UtilConst.TAG_DOCNO, id, Field.Store.YES));

                for (Map.Entry<String, String> e : docFields.entrySet())
                    if (!e.getKey().equals(UtilConst.TAG_DOCNO)) {
                        luceneDoc.add(new TextField(e.getKey(), e.getValue(), Field.Store.YES));
                    }
                indexWriter.addDocument(luceneDoc);
                if (docNum % 1000 == 0)
                    System.out.println("Indexed " + docNum + " docs");
            }
            System.out.println("Indexed " + docNum + " docs");
        }

        indexWriter.commit();
        indexWriter.close();

    } catch (ParseException e) {
        Usage("Cannot parse arguments", options);
    } catch (Exception e) {
        System.err.println("Terminating due to an exception: " + e);
        System.exit(1);
    }

}

From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.plain.PlainCorpusBuilder.java

License:Open Source License

private void initializeIndex(String index) {
    final File docDir = new File(index);

    Date start = new Date();
    try {//w  w  w  .j ava2s .  c  om
        System.out.println("Indexing to directory '" + docDir.getAbsolutePath() + "'...");

        Directory dir = FSDirectory.open(new File(index));
        Analyzer analyzer = new AnalyzerSpanish(Version.LUCENE_40);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        ////      in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        iwc.setRAMBufferSizeMB(1024.0);
        writer = new IndexWriter(dir, iwc);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        //             writer.forceMerge(1);
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.wikipedia.WikiCorpusBuilder.java

License:Open Source License

private void initializeIndex(String index) {
    final File docDir = new File(index);

    Date start = new Date();
    try {//from w  ww .  j  a v a  2s. c o m
        System.out.println("Indexing to directory '" + docDir.getAbsolutePath() + "'...");

        Directory dir = FSDirectory.open(new File(index));
        Analyzer analyzer = new AnalyzerEnglish(Version.LUCENE_40);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_40, analyzer);
        ////      in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        iwc.setRAMBufferSizeMB(1024.0);
        writer = new IndexWriter(dir, iwc);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        //             writer.forceMerge(1);
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexer.java

License:Open Source License

public void createIndex() {
    try {/*from  w  w  w.j  a v a2  s .c o m*/

        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("related_object", entropyAnalyzer);
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("defaultLabel", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        iwc.setRAMBufferSizeMB(4096);
        iwc.setMaxThreadStates(36);
        iwc.setSimilarity(new MySimilarity());
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);
        System.out.println("Indexing to directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");
        writer.close();
        System.out.println("Finished Indexing");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.rpi.tw.linkipedia.search.indexing.EntityIndexUpdater.java

License:Open Source License

public void updateIndex() {
    try {/*from  w w  w .j  a  v a2  s .c  o m*/

        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("related_object", entropyAnalyzer);
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("defaultLabel", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(4096);
        iwc.setMaxThreadStates(36);
        iwc.setSimilarity(new MySimilarity());
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);
        System.out.println("Update directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");
        writer.close();
        System.out.println("Finished Updating");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.rpi.tw.linkipedia.search.indexing.SurfaceFormIndexUpdater.java

License:Open Source License

public void updateIndex() {
    try {//from   w  w  w.ja  va2  s .  co  m
        Analyzer stdAnalyzer = DefaultAnalyzer.getAnalyzer();
        PayloadEncoder encoder = new FloatEncoder();
        EntropyAnalyzer entropyAnalyzer = new EntropyAnalyzer(encoder);
        Map<String, Analyzer> myAnalyzerMap = new HashMap<String, Analyzer>();
        myAnalyzerMap.put("label", entropyAnalyzer);
        myAnalyzerMap.put("analyzedLabel", stdAnalyzer);
        PerFieldAnalyzerWrapper MyAnalyzer = new PerFieldAnalyzerWrapper(stdAnalyzer, myAnalyzerMap);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, MyAnalyzer);
        iwc.setSimilarity(new MySimilarity());
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        Directory dir = FSDirectory.open(new File(indexDirectory));
        IndexWriter writer = new IndexWriter(dir, iwc);

        System.out.println("Indexing to directory '" + indexDirectory + "'...");
        indexDocs(writer, new File(sourceDirectory));
        System.out.println("Optimizing...");

        writer.close();
        System.out.println("Finished Indexing");

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

private IndexWriter openIndexWriter(Directory dir) throws IOException {
    //IndexWriterConfig config = new IndexWriterConfig(MUSE_LUCENE_VERSION, null);
    //IndexWriter writer = new IndexWriter(dir, null, IndexWriter.MaxFieldLength.UNLIMITED);
    IndexWriterConfig iwc = new IndexWriterConfig(LUCENE_VERSION, analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    return new IndexWriter(dir, iwc); // , new IndexWriter.MaxFieldLength(250000));
}

From source file:edu.uci.ics.cs221wiki.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from   www . ja v  a 2 s  .  co  m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}