Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode)

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:indexer.RealValuedVecIndexer.java

public RealValuedVecIndexer(String propFile, String indexDirName) throws Exception {
    prop = new Properties();
    prop.load(new FileReader(propFile));

    numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions"));
    DocVector.initVectorRange(prop);//  w  w  w. j av a 2s  .  co  m
    numIntervals = DocVector.numIntervals;

    boolean syntheticQueries = prop.getProperty("data.source").equals("synthetic");
    if (syntheticQueries) {
        rvgen = new RandomVecGen(prop);
        indexPath = rvgen.randomSamplesFileName() + ".index";
    } else
        indexPath = prop.getProperty(indexDirName);

    IndexWriterConfig iwcfg = new IndexWriterConfig(new WhitespaceAnalyzer());
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    writer = new IndexWriter(FSDirectory.open(new File(indexPath).toPath()), iwcfg);
}

From source file:indexer.SiftSubvecIndexer.java

public SiftSubvecIndexer(String propFile) throws Exception {
    super(propFile);

    subSpaceDimension = Integer.parseInt(prop.getProperty("subspace.dimension"));
    numsubVecs = numDimensions / subSpaceDimension;
    assert (numsubVecs * subSpaceDimension == numDimensions);

    subspaceWriters = new IndexWriter[numsubVecs];

    for (int i = 0; i < numsubVecs; i++) {
        IndexWriterConfig iwcfg = new IndexWriterConfig(new WhitespaceAnalyzer());
        iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        File indexDir = new File(this.indexPath + i);
        if (!indexDir.exists())
            indexDir.mkdir();/*w  w w  . ja v a 2  s  .com*/
        subspaceWriters[i] = new IndexWriter(FSDirectory.open(indexDir.toPath()), iwcfg);
    }
}

From source file:indexer.SpamRemover.java

public void filterIndex() throws Exception {
    System.out.println("Filtering index at " + indexDir.getPath());

    IndexWriterConfig iwcfg = new IndexWriterConfig(Version.LUCENE_4_9,
            new WhitespaceAnalyzer(Version.LUCENE_4_9));
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    writer = new IndexWriter(FSDirectory.open(outIndexDir), iwcfg);

    reader = DirectoryReader.open(FSDirectory.open(indexDir));
    int nDocs = reader.numDocs();

    for (int i = 0; i < nDocs; i++) {
        Document doc = reader.document(i);
        String docId = doc.get(ClueWebDocIndexer.FIELD_ID);
        if (nonSpams.isIndexable(docId))
            writer.addDocument(doc);//from w ww.j a  v a2  s  .co m
    }

    reader.close();
    writer.close();
}

From source file:Indexing.TRECDocParser.java

void processAll() throws Exception {
    System.out.println("Indexing TREC collection...");

    IndexWriterConfig iwcfg = new IndexWriterConfig(analyzer);
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    Path p = indexDir.toPath();//from ww w .ja  va2s  . co m

    writer = new IndexWriter(FSDirectory.open(p), iwcfg);

    indexAll();

    writer.close();
}

From source file:indextranslator.BOWTranslator.java

public BOWTranslator(String propfile) throws Exception {
    prop = new Properties();
    prop.load(new FileReader(propfile));

    inIndexPath = prop.getProperty("index");
    outIndexPath = prop.getProperty("translated.index");

    IndexWriterConfig iwcfg = new IndexWriterConfig(new PayloadAnalyzer());
    iwcfg.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    writer = new IndexWriter(FSDirectory.open(new File(outIndexPath).toPath()), iwcfg);
    reader = DirectoryReader.open(FSDirectory.open(new File(inIndexPath).toPath()));

    // Load the dict in memory
    dict = new Dictionary(Integer.parseInt(prop.getProperty("numtranslated_words", "3")),
            Float.parseFloat(prop.getProperty("translation.threshold_weight", "0.01")));
    dict.load(prop.getProperty("dict"));
}

From source file:info.boytsov.lucene.CreateIndex.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 3 && args.length != 4) {
        printUsage();/* w w w  . ja  v a 2 s  .co m*/
        System.exit(1);
    }
    String indexType = args[0];
    String indexSource = args[1];
    int commitInterval = 1000000;

    if (args.length >= 4) {
        commitInterval = Integer.parseInt(args[3]);
    }

    System.out.println("Commiting after indexing " + commitInterval + " docs");

    File outputDir = new File(args[2]);
    if (!outputDir.exists()) {
        if (!outputDir.mkdirs()) {
            System.out.println("couldn't create " + outputDir.getAbsolutePath());
            return;
        }
    }
    if (!outputDir.isDirectory()) {
        System.out.println(outputDir.getAbsolutePath() + " is not a directory!");
        return;
    }
    if (!outputDir.canWrite()) {
        System.out.println("Can't write to " + outputDir.getAbsolutePath());
        return;
    }

    FSDirectory dir = FSDirectory.open(outputDir);

    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);// default
                                                                        // stop
                                                                        // words
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);// overwrites
                                                          // if
                                                          // needed
    IndexWriter indexWriter = new IndexWriter(dir, config);

    DocMaker docMaker = new DocMaker();
    Properties properties = new Properties();
    properties.setProperty("content.source.forever", "false"); // will
                                                               // parse
                                                               // each
                                                               // document
                                                               // only
                                                               // once
    properties.setProperty("doc.index.props", "true");
    // We want to store small-size fields like URL or even title  ...
    properties.setProperty("doc.stored", "true");
    // but not the large one (great savings, 3x reduction in space)!
    properties.setProperty("doc.body.stored", "false");

    ContentSource source = CreateSource(indexType, indexSource, properties);

    if (source == null) {
        System.err.println("Failed to create a source: " + indexType + "(" + indexSource + ")");
        printUsage();
        System.exit(1);
    }

    Config c = new Config(properties);
    source.setConfig(c);
    source.resetInputs();// though this does not seem needed, it is
                         // (gets the file opened?)
    docMaker.setConfig(c, source);
    int count = 0;
    System.out.println("Starting Indexing of " + indexType + " source " + indexSource);

    long start = System.currentTimeMillis();
    Document doc;
    try {
        while ((doc = docMaker.makeDocument()) != null) {
            indexWriter.addDocument(doc);
            ++count;
            if (count % 5000 == 0) {
                System.out.println(
                        "Indexed " + count + " documents in " + (System.currentTimeMillis() - start) + " ms");
            }
            if (count % commitInterval == 0) {
                indexWriter.commit();
                System.out.println("Committed");
            }
        }
    } catch (org.apache.lucene.benchmark.byTask.feeds.NoMoreDataException nmd) {
        System.out.println("Caught NoMoreDataException! -- Finishing"); // All done
    }
    long finish = System.currentTimeMillis();
    System.out.println("Indexing " + count + " documents took " + (finish - start) + " ms");
    System.out.println("Total data processed: " + source.getTotalBytesCount() + " bytes");
    System.out.println("Index should be located at " + dir.getDirectory().getAbsolutePath());
    docMaker.close();
    indexWriter.commit();
    indexWriter.close();

}

From source file:info.johtani.jjug.lucene.sample.IndexerSample.java

License:Apache License

public static void main(String[] args) {

    String indexDirectory = "./indexdir";
    String[] texts = {//from  ww w  .j  a  v  a  2  s .c om
            "JJUG?Lucene?????johtani?????",
            "JJUG CCC?Elasticsearch?Kibana????johtani?????",
            "Elasticsearch?Kibana4????johtani?????" };
    IndexWriter writer = null;

    try {
        //??
        Directory dir = FSDirectory.open(new File(indexDirectory));
        //??
        StandardAnalyzer analyzer = new StandardAnalyzer();
        //?????
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);
        //CREATE_OR_APPEND?????????????
        //CREATE?????????
        //config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        config.setOpenMode(OpenMode.CREATE);
        writer = new IndexWriter(dir, config);

        //?
        for (String text : texts) {
            //????
            writer.addDocument(getDocument(text));
        }

        // ???List?
        //List<Document> docs = new ArrayList<Document>();
        //docs.add(document);
        //writer.addDocuments(docs);

        //writer?????????
        //writer.commit();

    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        try {
            //close??
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            //ignore
        }
    }
    System.out.println("Finished!");
}

From source file:intelligentWebAlgorithms.algos.search.lucene.LuceneIndexBuilder.java

License:Apache License

private IndexWriter getIndexWriter(File file) throws IOException {
    FSDirectory dir = FSDirectory.open(file);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_44,
            new StandardAnalyzer(Version.LUCENE_44));
    config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    config.setRAMBufferSizeMB(RamBufferSizeMB);
    return new IndexWriter(dir, config);
}

From source file:invertedindex.IndexCreater.java

public IndexCreater() throws IOException {

    FSDirectory dir = FSDirectory.open(new File(getIndexPath()));

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);

    //        File fileDir = new File(indexLocation);
    //   for(File file: fileDir.listFiles()) 
    //       if (!file.isDirectory()) 
    //           file.delete();
    //       //from w w  w  . j  a v  a 2 s . c  o  m
    // Add new documents to an existing index:
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    writer = new IndexWriter(dir, config);

    //        try{
    //            indexFileOrDirectory(f,fileContent);
    //        }
    //        catch(Exception e){
    //             System.out.println(e);
    //            
    //        }

    //closeIndex();

}

From source file:invertedindex.LineIndexing.java

public LineIndexing() throws IOException {
    FSDirectory dir = FSDirectory.open(new File(getLineIndexLocation()));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    writer = new IndexWriter(dir, config);

}