Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode)

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:org.abondar.experimental.eventsearch.SearchData.java

public void IndexFiles() {
    try {//from ww w  .  ja  va 2s  .  c om
        Directory dir = FSDirectory.open(new File(indexPath));
        // FSDirectory.open(new File(indexPath));

        Analyzer an = new StandardAnalyzer(Version.LUCENE_44);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, an);

        if (create) {
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
        } else {
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        }

        IndexWriter writer = new IndexWriter(dir, iwc);
        Path docs = Paths.get(indexPath);
        indexDocs(writer, docs);
        writer.close();
    } catch (IOException ex) {
        Logger.getLogger(SearchData.class.getName()).log(Level.SEVERE, null, ex);
    }
}

From source file:org.aksw.lucene.extractor.DocumentExtractor.java

License:Apache License

public static void main(String args[]) {

    if (args.length < 3) {
        System.err.println("\nUsage: IndexProcessor <NIF(turtle)> <City> <Index Path>\n");
        System.exit(1);/*from w  w  w .j  av a2  s  .  c  o  m*/
    }

    nifFilename = args[0]; //Eg.: /home/spotlight/storage/nif/boris/output.ttl"
    city = args[1]; //Eg.: Berlin
    String indexPath = args[2]; // Eg.: /home/spotlight/storage/nif/nifoggd

    try {

        DocumentExtractor documentExtractor = new DocumentExtractor(new File(indexPath), luceneAnalyzer);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
        config.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
        writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config);
        setDictionary(documentExtractor.getPlaces(city));
        readNIF();
        writer.close();

    } catch (IOException e) {
        LOG.error("Parameters:");
        LOG.error("NIF(turtle): %s".format(nifFilename));
        LOG.error("City: %s".format(city));
        LOG.error("Index Path: %s".format(indexPath));
        e.printStackTrace();

    }

}

From source file:org.aksw.lucene.index.IndexManager.java

License:Apache License

/**
 * Create an index using a list of places
 *
 * @param places/*from   w w w. jav  a 2  s.c om*/
 */
public void createIndex(List<Place> places) throws IOException {

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    processIndex(config, places);

}

From source file:org.aksw.lucene.index.IndexManager.java

License:Apache License

/**
 * Update an index using a list of places
 *
 * @param places// w  ww.  ja v a2s  .co m
 */
public void appendIndex(List<Place> places) throws IOException {

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    processIndex(config, places);

}

From source file:org.aksw.palmetto.corpus.lucene.creation.PositionStoringLuceneIndexCreator.java

License:Open Source License

/**
 * Creates the index./*www.j  a  v  a  2  s . com*/
 * 
 * @param indexPath
 *            The path to the director in which the Lucene index will be created
 * @param docIterator
 *            Iterator that iterates over the document texts.
 * @return true if the creation was successful, else false.
 */
public boolean createIndex(File indexPath, Iterator<IndexableDocument> docIterator) {
    LOGGER.info("Starting index creation...");
    IndexWriter writer = null;
    indexPath.mkdirs();
    Analyzer analyzer = new SimpleAnalyzer(true);
    try {
        IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
        config.setOpenMode(OpenMode.CREATE);

        FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
        fieldType.setIndexed(true);
        fieldType.setStoreTermVectors(true);
        fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
        fieldType.freeze();

        FieldType docLengthFieldType = new FieldType(IntField.TYPE_STORED);
        docLengthFieldType.setIndexed(false);
        docLengthFieldType.freeze();

        writer = new IndexWriter(FSDirectory.open(indexPath), config);
        int count = 0;
        Document indexDocument;
        IndexableDocument currentDocument;
        while (docIterator.hasNext()) {
            currentDocument = docIterator.next();
            if (currentDocument.getText().length() > 0) {
                indexDocument = toLuceneDocument(analyzer, currentDocument.getText(), fieldType);
                addDocumentLength(indexDocument, docLengthFieldName, docLengthFieldType,
                        currentDocument.getNumberOfTokens());
                writer.addDocument(indexDocument);
                ++count;
                if (count >= commitInterval) {
                    writer.commit();
                    System.gc();
                    count = 0;
                }
            }
        }
        LOGGER.info("Finished index creation.");
    } catch (IOException e) {
        LOGGER.error("Error while creating Index. Aborting.", e);
        return false;
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
            }
        }
    }
    return true;
}

From source file:org.aksw.palmetto.corpus.lucene.creation.SimpleLuceneIndexCreator.java

License:Open Source License

/**
 * Creates the index.//from w  ww .  j a  v a  2  s.co  m
 * 
 * @param indexPath
 *            The path to the director in which the Lucene index will be created
 * @param docIterator
 *            Iterator that iterates over the document texts.
 * @return true if the creation was successful, else false.
 */
public boolean createIndex(File indexPath, Iterator<String> docIterator) {
    LOGGER.info("Starting index creation...");
    IndexWriter writer = null;
    indexPath.mkdirs();
    Analyzer analyzer = new SimpleAnalyzer(true);
    try {
        IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
        config.setOpenMode(OpenMode.CREATE);

        FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED);
        fieldType.setIndexed(true);
        fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);
        fieldType.freeze();

        writer = new IndexWriter(FSDirectory.open(indexPath), config);
        String text;
        int count = 0;
        while (docIterator.hasNext()) {
            text = docIterator.next();
            if (text.length() > 0) {
                writer.addDocument(toLuceneDocument(analyzer, text, fieldType));
                ++count;
                if (count >= commitInterval) {
                    writer.commit();
                    System.gc();
                    count = 0;
                }
            } else {
                LOGGER.warn("Got a document without content.");
            }
        }
        LOGGER.info("Finished index creation.");
    } catch (IOException e) {
        LOGGER.error("Error while creating Index. Aborting.", e);
        return false;
    } finally {
        if (writer != null) {
            try {
                writer.close();
            } catch (Exception e) {
            }
        }
    }
    return true;
}

From source file:org.ala.lucene.Autocompleter.java

License:Open Source License

@SuppressWarnings("unchecked")
public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex)
        throws CorruptIndexException, IOException {
    // build a dictionary (from the spell package)
    IndexReader sourceReader = IndexReader.open(sourceDirectory);

    LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete);

    // code from//ww  w. j av  a2  s.com
    // org.apache.lucene.search.spell.SpellChecker.indexDictionary(
    // Dictionary)
    IndexWriter.unlock(autoCompleteDirectory);

    // use a custom analyzer so we can do EdgeNGramFiltering
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() {
        protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
            final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result,
                    new CharArraySet(SolrUtils.BIE_LUCENE_VERSION,
                            new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true));
            result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20);
            return new TokenStreamComponents(src, result) {
                @Override
                protected void setReader(final Reader reader) throws IOException {
                    super.setReader(reader);
                }

            };
        }
        //            public TokenStream tokenStream(String fieldName, Reader reader) {
        //            TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader);
        //            
        //            result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result);
        //            //result = new ISOLatin1AccentFilter(result);
        //            result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)));
        //            result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20);
        //            
        //            return result;
        //          }
    });
    if (createNewIndex) {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    indexWriterConfig.setMaxBufferedDocs(150);
    IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig);
    //        writer.setMergeFactor(300);

    // go through every word, storing the original word (incl. n-grams)
    // and the number of times it occurs
    Map<String, Integer> wordsMap = new HashMap<String, Integer>();

    Iterator<String> iter = (Iterator<String>) dict.getWordsIterator();
    while (iter.hasNext()) {
        String word = iter.next();

        int len = word.length();
        if (len < 3) {
            continue; // too short we bail but "too long" is fine...
        }

        if (wordsMap.containsKey(word)) {
            throw new IllegalStateException("This should never happen in Lucene 2.3.2");
            // wordsMap.put(word, wordsMap.get(word) + 1);
        } else {
            // use the number of documents this word appears in
            wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word)));
        }
    }

    for (String word : wordsMap.keySet()) {
        // ok index the word
        Document doc = new Document();
        doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term
        doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed
        doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO,
                Field.Index.NOT_ANALYZED)); // count

        writer.addDocument(doc);
    }

    sourceReader.close();

    // close writer
    writer.forceMerge(1);
    writer.close();

    // re-open our reader
    reOpenReader();
}

From source file:org.apache.gora.lucene.store.LuceneStore.java

License:Apache License

@Override
public void initialize(Class<K> keyClass, Class<T> persistentClass, Properties properties)
        throws GoraException {
    try {/* www. ja  v  a  2  s  . c  o m*/
        super.initialize(keyClass, persistentClass, properties);
    } catch (GoraException ge) {
        LOG.error(ge.getMessage(), ge);
        throw new GoraException(ge);
    }

    String mappingFile = null;
    try {
        mappingFile = DataStoreFactory.getMappingFile(properties, (DataStore<?, ?>) this, DEFAULT_MAPPING_FILE);
    } catch (IOException ioe) {
        LOG.error(ioe.getMessage(), ioe);
        throw new GoraException(ioe);
    }
    String luceneVersion = properties.getProperty(LUCENE_VERSION_KEY, DEFAULT_LUCENE_VERSION);
    String ramBuffer = properties.getProperty(LUCENE_RAM_BUFFER_KEY, DEFAULT_LUCENE_RAMBUFFER);

    LOG.debug("Lucene index version: {}", luceneVersion);
    LOG.debug("Lucene index writer RAM buffer size: {}", ramBuffer);

    try {
        mapping = readMapping(mappingFile);
    } catch (IOException ioe) {
        LOG.error(ioe.getMessage(), ioe);
        throw new GoraException(ioe);
    }
    String persistentClassObject = persistentClass.getCanonicalName();
    String dataStoreOutputPath = outputPath + "_" + persistentClassObject
            .substring(persistentClassObject.lastIndexOf('.') + 1).toLowerCase(Locale.getDefault());
    try {
        dir = FSDirectory.open(FileSystems.getDefault().getPath(dataStoreOutputPath));

        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwc.setRAMBufferSizeMB(Double.parseDouble(ramBuffer));

        writer = new IndexWriter(dir, iwc);
        //TODO do we definately want all past deletions to be applied.
        searcherManager = new SearcherManager(writer, true, true, new SearcherFactory());
    } catch (IOException e) {
        LOG.error("Error opening {} with Lucene FSDirectory.", outputPath, e);
    }
}

From source file:org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.java

License:Apache License

protected IndexWriterConfig createConfig(Analyzer analyzer, boolean dropIndexOnStart) {
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer);
    if (dropIndexOnStart) {
        config.setOpenMode(OpenMode.CREATE);
    } else {// w ww. j a  v  a2  s .c om
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
    }
    return config;
}

From source file:org.apache.jena.larq.IndexWriterFactory.java

License:Apache License

public static IndexWriter create(Directory dir)
        throws CorruptIndexException, LockObtainFailedException, IOException {
    IndexWriterConfig config = new IndexWriterConfig(LARQ.LUCENE_VERSION,
            new StandardAnalyzer(LARQ.LUCENE_VERSION));
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    return new IndexWriter(dir, config);
}