List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:org.abondar.experimental.eventsearch.SearchData.java
public void IndexFiles() { try {//from ww w . ja va 2s . c om Directory dir = FSDirectory.open(new File(indexPath)); // FSDirectory.open(new File(indexPath)); Analyzer an = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, an); if (create) { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); Path docs = Paths.get(indexPath); indexDocs(writer, docs); writer.close(); } catch (IOException ex) { Logger.getLogger(SearchData.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:org.aksw.lucene.extractor.DocumentExtractor.java
License:Apache License
public static void main(String args[]) { if (args.length < 3) { System.err.println("\nUsage: IndexProcessor <NIF(turtle)> <City> <Index Path>\n"); System.exit(1);/*from w w w .j av a2 s . c o m*/ } nifFilename = args[0]; //Eg.: /home/spotlight/storage/nif/boris/output.ttl" city = args[1]; //Eg.: Berlin String indexPath = args[2]; // Eg.: /home/spotlight/storage/nif/nifoggd try { DocumentExtractor documentExtractor = new DocumentExtractor(new File(indexPath), luceneAnalyzer); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); writer = new IndexWriter(FSDirectory.open(new File(indexPath)), config); setDictionary(documentExtractor.getPlaces(city)); readNIF(); writer.close(); } catch (IOException e) { LOG.error("Parameters:"); LOG.error("NIF(turtle): %s".format(nifFilename)); LOG.error("City: %s".format(city)); LOG.error("Index Path: %s".format(indexPath)); e.printStackTrace(); } }
From source file:org.aksw.lucene.index.IndexManager.java
License:Apache License
/** * Create an index using a list of places * * @param places/*from w w w. jav a 2 s.c om*/ */ public void createIndex(List<Place> places) throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE); processIndex(config, places); }
From source file:org.aksw.lucene.index.IndexManager.java
License:Apache License
/** * Update an index using a list of places * * @param places// w ww. ja v a2s .co m */ public void appendIndex(List<Place> places) throws IOException { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.APPEND); processIndex(config, places); }
From source file:org.aksw.palmetto.corpus.lucene.creation.PositionStoringLuceneIndexCreator.java
License:Open Source License
/** * Creates the index./*www.j a v a 2 s . com*/ * * @param indexPath * The path to the director in which the Lucene index will be created * @param docIterator * Iterator that iterates over the document texts. * @return true if the creation was successful, else false. */ public boolean createIndex(File indexPath, Iterator<IndexableDocument> docIterator) { LOGGER.info("Starting index creation..."); IndexWriter writer = null; indexPath.mkdirs(); Analyzer analyzer = new SimpleAnalyzer(true); try { IndexWriterConfig config = new IndexWriterConfig(version, analyzer); config.setOpenMode(OpenMode.CREATE); FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); fieldType.setIndexed(true); fieldType.setStoreTermVectors(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); fieldType.freeze(); FieldType docLengthFieldType = new FieldType(IntField.TYPE_STORED); docLengthFieldType.setIndexed(false); docLengthFieldType.freeze(); writer = new IndexWriter(FSDirectory.open(indexPath), config); int count = 0; Document indexDocument; IndexableDocument currentDocument; while (docIterator.hasNext()) { currentDocument = docIterator.next(); if (currentDocument.getText().length() > 0) { indexDocument = toLuceneDocument(analyzer, currentDocument.getText(), fieldType); addDocumentLength(indexDocument, docLengthFieldName, docLengthFieldType, currentDocument.getNumberOfTokens()); writer.addDocument(indexDocument); ++count; if (count >= commitInterval) { writer.commit(); System.gc(); count = 0; } } } LOGGER.info("Finished index creation."); } catch (IOException e) { LOGGER.error("Error while creating Index. Aborting.", e); return false; } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { } } } return true; }
From source file:org.aksw.palmetto.corpus.lucene.creation.SimpleLuceneIndexCreator.java
License:Open Source License
/** * Creates the index.//from w ww . j a v a 2 s.co m * * @param indexPath * The path to the director in which the Lucene index will be created * @param docIterator * Iterator that iterates over the document texts. * @return true if the creation was successful, else false. */ public boolean createIndex(File indexPath, Iterator<String> docIterator) { LOGGER.info("Starting index creation..."); IndexWriter writer = null; indexPath.mkdirs(); Analyzer analyzer = new SimpleAnalyzer(true); try { IndexWriterConfig config = new IndexWriterConfig(version, analyzer); config.setOpenMode(OpenMode.CREATE); FieldType fieldType = new FieldType(TextField.TYPE_NOT_STORED); fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); fieldType.freeze(); writer = new IndexWriter(FSDirectory.open(indexPath), config); String text; int count = 0; while (docIterator.hasNext()) { text = docIterator.next(); if (text.length() > 0) { writer.addDocument(toLuceneDocument(analyzer, text, fieldType)); ++count; if (count >= commitInterval) { writer.commit(); System.gc(); count = 0; } } else { LOGGER.warn("Got a document without content."); } } LOGGER.info("Finished index creation."); } catch (IOException e) { LOGGER.error("Error while creating Index. Aborting.", e); return false; } finally { if (writer != null) { try { writer.close(); } catch (Exception e) { } } } return true; }
From source file:org.ala.lucene.Autocompleter.java
License:Open Source License
@SuppressWarnings("unchecked") public void reIndex(Directory sourceDirectory, String fieldToAutocomplete, boolean createNewIndex) throws CorruptIndexException, IOException { // build a dictionary (from the spell package) IndexReader sourceReader = IndexReader.open(sourceDirectory); LuceneDictionary dict = new LuceneDictionary(sourceReader, fieldToAutocomplete); // code from//ww w. j av a2 s.com // org.apache.lucene.search.spell.SpellChecker.indexDictionary( // Dictionary) IndexWriter.unlock(autoCompleteDirectory); // use a custom analyzer so we can do EdgeNGramFiltering IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SolrUtils.BIE_LUCENE_VERSION, new Analyzer() { protected TokenStreamComponents createComponents(String fieldName, Reader reader) { final StandardTokenizer src = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result); result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result); result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new CharArraySet(SolrUtils.BIE_LUCENE_VERSION, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS)), true)); result = new EdgeNGramTokenFilter(result, Side.FRONT, 1, 20); return new TokenStreamComponents(src, result) { @Override protected void setReader(final Reader reader) throws IOException { super.setReader(reader); } }; } // public TokenStream tokenStream(String fieldName, Reader reader) { // TokenStream result = new StandardTokenizer(SolrUtils.BIE_LUCENE_VERSION, reader); // // result = new StandardFilter(SolrUtils.BIE_LUCENE_VERSION, result); // result = new LowerCaseFilter(SolrUtils.BIE_LUCENE_VERSION, result); // //result = new ISOLatin1AccentFilter(result); // result = new StopFilter(SolrUtils.BIE_LUCENE_VERSION, result, new HashSet<String>(Arrays.asList(ENGLISH_STOP_WORDS))); // result = new EdgeNGramTokenFilter(result, Side.FRONT,1, 20); // // return result; // } }); if (createNewIndex) { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } indexWriterConfig.setMaxBufferedDocs(150); IndexWriter writer = new IndexWriter(autoCompleteDirectory, indexWriterConfig); // writer.setMergeFactor(300); // go through every word, storing the original word (incl. n-grams) // and the number of times it occurs Map<String, Integer> wordsMap = new HashMap<String, Integer>(); Iterator<String> iter = (Iterator<String>) dict.getWordsIterator(); while (iter.hasNext()) { String word = iter.next(); int len = word.length(); if (len < 3) { continue; // too short we bail but "too long" is fine... } if (wordsMap.containsKey(word)) { throw new IllegalStateException("This should never happen in Lucene 2.3.2"); // wordsMap.put(word, wordsMap.get(word) + 1); } else { // use the number of documents this word appears in wordsMap.put(word, sourceReader.docFreq(new Term(fieldToAutocomplete, word))); } } for (String word : wordsMap.keySet()) { // ok index the word Document doc = new Document(); doc.add(new Field(SOURCE_WORD_FIELD, word, Field.Store.YES, Field.Index.NOT_ANALYZED)); // orig term doc.add(new Field(GRAMMED_WORDS_FIELD, word, Field.Store.YES, Field.Index.ANALYZED)); // grammed doc.add(new Field(COUNT_FIELD, Integer.toString(wordsMap.get(word)), Field.Store.NO, Field.Index.NOT_ANALYZED)); // count writer.addDocument(doc); } sourceReader.close(); // close writer writer.forceMerge(1); writer.close(); // re-open our reader reOpenReader(); }
From source file:org.apache.gora.lucene.store.LuceneStore.java
License:Apache License
@Override public void initialize(Class<K> keyClass, Class<T> persistentClass, Properties properties) throws GoraException { try {/* www. ja v a 2 s . c o m*/ super.initialize(keyClass, persistentClass, properties); } catch (GoraException ge) { LOG.error(ge.getMessage(), ge); throw new GoraException(ge); } String mappingFile = null; try { mappingFile = DataStoreFactory.getMappingFile(properties, (DataStore<?, ?>) this, DEFAULT_MAPPING_FILE); } catch (IOException ioe) { LOG.error(ioe.getMessage(), ioe); throw new GoraException(ioe); } String luceneVersion = properties.getProperty(LUCENE_VERSION_KEY, DEFAULT_LUCENE_VERSION); String ramBuffer = properties.getProperty(LUCENE_RAM_BUFFER_KEY, DEFAULT_LUCENE_RAMBUFFER); LOG.debug("Lucene index version: {}", luceneVersion); LOG.debug("Lucene index writer RAM buffer size: {}", ramBuffer); try { mapping = readMapping(mappingFile); } catch (IOException ioe) { LOG.error(ioe.getMessage(), ioe); throw new GoraException(ioe); } String persistentClassObject = persistentClass.getCanonicalName(); String dataStoreOutputPath = outputPath + "_" + persistentClassObject .substring(persistentClassObject.lastIndexOf('.') + 1).toLowerCase(Locale.getDefault()); try { dir = FSDirectory.open(FileSystems.getDefault().getPath(dataStoreOutputPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(Double.parseDouble(ramBuffer)); writer = new IndexWriter(dir, iwc); //TODO do we definately want all past deletions to be applied. searcherManager = new SearcherManager(writer, true, true, new SearcherFactory()); } catch (IOException e) { LOG.error("Error opening {} with Lucene FSDirectory.", outputPath, e); } }
From source file:org.apache.james.mailbox.lucene.search.LuceneMessageSearchIndex.java
License:Apache License
protected IndexWriterConfig createConfig(Analyzer analyzer, boolean dropIndexOnStart) { IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (dropIndexOnStart) { config.setOpenMode(OpenMode.CREATE); } else {// w ww. j a v a2 s .c om config.setOpenMode(OpenMode.CREATE_OR_APPEND); } return config; }
From source file:org.apache.jena.larq.IndexWriterFactory.java
License:Apache License
public static IndexWriter create(Directory dir) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriterConfig config = new IndexWriterConfig(LARQ.LUCENE_VERSION, new StandardAnalyzer(LARQ.LUCENE_VERSION)); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); return new IndexWriter(dir, config); }