List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:org.ohdsi.usagi.tests.TestLucene.java
License:Apache License
public static void main(String[] args) throws IOException, ParseException { Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_4_9); //Analyzer analyzer = new UsagiAnalyzer(); FieldType textVectorField = new FieldType(); textVectorField.setIndexed(true);/*from www. j a v a 2 s . c o m*/ textVectorField.setTokenized(true); textVectorField.setStoreTermVectors(true); textVectorField.setStoreTermVectorPositions(false); textVectorField.setStoreTermVectorPayloads(false); textVectorField.setStoreTermVectorOffsets(false); textVectorField.setStored(true); textVectorField.freeze(); File indexFolder = new File(folder); if (indexFolder.exists()) DirectoryUtilities.deleteDir(indexFolder); Directory dir = FSDirectory.open(indexFolder); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, analyzer); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); Document doc = new Document(); doc.add(new Field("F", "word1 word2 w3 word4", textVectorField)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("F", "word1 word2 w3", textVectorField)); writer.addDocument(doc); writer.close(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(folder))); for (int i = 0; i < reader.numDocs(); i++) { TermsEnum termsEnum = reader.getTermVector(i, "F").iterator(null); BytesRef text; while ((text = termsEnum.next()) != null) { System.out.print(text.utf8ToString() + ","); } System.out.println(); } IndexSearcher searcher = new IndexSearcher(reader); // MoreLikeThis mlt = new MoreLikeThis(searcher.getIndexReader()); // mlt.setMinTermFreq(0); // mlt.setMinDocFreq(0); // mlt.setMaxDocFreq(9999); // mlt.setMinWordLen(0); // mlt.setMaxWordLen(9999); // mlt.setMaxDocFreqPct(100); // mlt.setMaxNumTokensParsed(9999); // mlt.setMaxQueryTerms(9999); // mlt.setStopWords(null); // mlt.setFieldNames(new String[] { "F" }); // mlt.setAnalyzer(new UsagiAnalyzer()); // Query query = mlt.like("F", new StringReader("Systolic blood pressure")); QueryParser parser = new QueryParser(Version.LUCENE_4_9, "F", analyzer); Query query = parser.parse("word1"); Explanation explanation = searcher.explain(query, 0); print(explanation); System.out.println(); explanation = searcher.explain(query, 1); print(explanation); System.out.println(); TopDocs topDocs = searcher.search(query, 99); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { System.out.println(scoreDoc.score + "\t" + reader.document(scoreDoc.doc).get("F")); } }
From source file:org.ohdsi.usagi.UsagiSearchEngine.java
License:Apache License
public void createNewMainIndex() { try {//w w w . j ava 2 s . co m File indexFolder = new File(folder + "/" + MAIN_INDEX_FOLDER); if (indexFolder.exists()) DirectoryUtilities.deleteDir(indexFolder); Directory dir = FSDirectory.open(indexFolder); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, new UsagiAnalyzer()); iwc.setOpenMode(OpenMode.CREATE); iwc.setRAMBufferSizeMB(256.0); writer = new IndexWriter(dir, iwc); } catch (Exception e) { throw new RuntimeException(e); } }
From source file:org.ojbc.adapters.analyticaldatastore.personid.IndexedIdentifierGenerationStrategy.java
License:RPL License
private void init() throws Exception { Directory indexDirectory = FSDirectory.open(new File(indexDirectoryPath)); log.info("Set Lucene index directory to " + indexDirectory.toString()); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); indexWriter = new IndexWriter(indexDirectory, config); }
From source file:org.ojbc.adapters.analyticaldatastore.util.LuceneUtils.java
License:RPL License
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Must provide source and target index directories as command line arguments"); System.exit(1);/*w w w .ja v a 2s. c o m*/ } Directory sourceDir = FSDirectory.open(new File(args[0])); DirectoryReader reader = DirectoryReader.open(sourceDir); Directory targetDir = FSDirectory.open(new File(args[1])); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer); config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); config.setIndexDeletionPolicy(new SnapshotDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy())); IndexWriter writer = new IndexWriter(targetDir, config); Set<String> allowedFields = new HashSet<String>(); allowedFields.add(IdentifierGenerationStrategy.FIRST_NAME_FIELD); allowedFields.add(IdentifierGenerationStrategy.LAST_NAME_FIELD); allowedFields.add(IdentifierGenerationStrategy.MIDDLE_NAME_FIELD); allowedFields.add(IdentifierGenerationStrategy.BIRTHDATE_FIELD); allowedFields.add(IdentifierGenerationStrategy.SEX_FIELD); allowedFields.add(IdentifierGenerationStrategy.SSN_FIELD); allowedFields.add(IdentifierGenerationStrategy.ID_FIELD); try { int lastDocumentIndex = reader.maxDoc(); for (int i = 0; i < lastDocumentIndex; i++) { Document d = reader.document(i); Document newDoc = new Document(); List<IndexableField> fields = d.getFields(); for (IndexableField f : fields) { String fieldName = f.name(); String fieldValue = f.stringValue(); if (allowedFields.contains(fieldName)) { newDoc.add(new StringField(fieldName, fieldValue, Store.YES)); } } writer.addDocument(newDoc); writer.commit(); } } finally { reader.close(); writer.close(); } }
From source file:org.olat.search.service.indexer.JmsIndexer.java
License:Apache License
public IndexWriterConfig newIndexWriterConfig() { Analyzer analyzer = new StandardAnalyzer(SearchService.OO_LUCENE_VERSION); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(SearchService.OO_LUCENE_VERSION, analyzer); indexWriterConfig.setMergePolicy(newLogMergePolicy()); indexWriterConfig.setRAMBufferSizeMB(ramBufferSizeMB);// for better performance set to 48MB (see lucene docu 'how to make indexing faster") indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); return indexWriterConfig; }
From source file:org.open.crs.service.lucene.IndexFiles.java
License:Apache License
public IndexFiles(String indexPath) throws IOException { Directory dir = FSDirectory.open(Paths.get(indexPath)); // Analyzer analyzer = new StandardAnalyzer(); // Analyzer analyzer = new CJKAnalyzer(); Analyzer analyzer = new SmartChineseAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else {//from w w w . ja v a2 s . co m // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); writer = new IndexWriter(dir, iwc); }
From source file:org.opencms.search.CmsSearchIndex.java
License:Open Source License
/** * Creates a new index writer.<p>//w w w . j av a 2 s. c o m * * @param create if <code>true</code> a whole new index is created, if <code>false</code> an existing index is updated * * @return the created new index writer * * @throws CmsIndexException in case the writer could not be created * * @see #getIndexWriter(I_CmsReport, boolean) */ protected I_CmsIndexWriter indexWriterCreate(boolean create) throws CmsIndexException { IndexWriter indexWriter; try { // check if the target directory already exists File f = new File(m_path); if (!f.exists()) { // index does not exist yet f = f.getParentFile(); if ((f != null) && !f.exists()) { // create the parent folders if required f.mkdirs(); } // create must be true if the directory does not exist create = true; } // open file directory for Lucene FSDirectory dir = FSDirectory.open(new File(m_path)); // create Lucene merge policy LogMergePolicy mergePolicy = new LogByteSizeMergePolicy(); if (m_luceneMaxMergeDocs != null) { mergePolicy.setMaxMergeDocs(m_luceneMaxMergeDocs.intValue()); } if (m_luceneMergeFactor != null) { mergePolicy.setMergeFactor(m_luceneMergeFactor.intValue()); } if (m_luceneUseCompoundFile != null) { mergePolicy.setUseCompoundFile(m_luceneUseCompoundFile.booleanValue()); } // create a new Lucene index configuration IndexWriterConfig indexConfig = new IndexWriterConfig(LUCENE_VERSION, getAnalyzer()); // set the index configuration parameters if required if (m_luceneRAMBufferSizeMB != null) { indexConfig.setRAMBufferSizeMB(m_luceneRAMBufferSizeMB.doubleValue()); } if (create) { indexConfig.setOpenMode(OpenMode.CREATE); } else { indexConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); } // create the index indexWriter = new IndexWriter(dir, indexConfig); } catch (Exception e) { throw new CmsIndexException( Messages.get().container(Messages.ERR_IO_INDEX_WRITER_OPEN_2, m_path, m_name), e); } return new CmsLuceneIndexWriter(indexWriter, this); }
From source file:org.openeclass.lucene.demo.IndexCourses.java
License:Open Source License
@SuppressWarnings("deprecation") public static void main(String[] args) { String usage = "java org.openeclass.lucene.demo.IndexCourses" + " [-index INDEX_PATH] [-update]\n\n" + "This indexes the courses in Eclass DB, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "data/eclass-index"; boolean create = true; boolean help = false; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1];/*from w w w . j a va 2 s. co m*/ i++; } else if ("-update".equals(args[i])) { create = false; } else if ("-help".equals(args[i])) { help = true; } } if (help) { System.err.println("Usage: " + usage); System.exit(1); } Date start = new Date(); try { System.out.println("Opening Database connection ..."); Properties props = PropertyLoader.loadProperties("project-properties.xml"); Connection con = DriverManager.getConnection(props.getProperty("jdbcurl"), props.getProperty("user"), props.getProperty("password")); con.setAutoCommit(false); System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_23); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_23, analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); indexCourses(writer, con); writer.close(); con.commit(); con.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } catch (SQLException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); e.printStackTrace(); } }
From source file:org.openerproject.targetproperties.svector.indexing.CustomLuceneIndexer.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String indexPath, String docsPath, boolean create) { // String usage = "java org.apache.lucene.demo.IndexFiles" // + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" // + "This indexes the documents in DOCS_PATH, creating a Lucene index" // + "in INDEX_PATH that can be searched with SearchFiles"; // String indexPath = "index"; // String docsPath = null; // boolean create = true; // for(int i=0;i<args.length;i++) { // if ("-index".equals(args[i])) { // indexPath = args[i+1]; // i++; // } else if ("-docs".equals(args[i])) { // docsPath = args[i+1]; // i++; // } else if ("-update".equals(args[i])) { // create = false; // }//from w w w . j ava 2 s. c om // } // if (docsPath == null) { // System.err.println("Usage: " + usage); // System.exit(1); // } final File docDir = new File(docsPath); // if (!docDir.exists() || !docDir.canRead()) { // System.out.println("Document directory '" +docDir.getAbsolutePath()+ "' does not exist or is not readable, please check the path"); // System.exit(1); // } Date start = new Date(); try { log.info("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new WhitespaceAnalyzer(LuceneUtils.LUCENE_VERSION); IndexWriterConfig iwc = new IndexWriterConfig(LuceneUtils.LUCENE_VERSION, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:org.opengrok.indexer.index.IndexDatabase.java
License:Open Source License
/** * Update the content of this index database * * @throws IOException if an error occurs *///from w w w. j av a2 s . co m public void update() throws IOException { synchronized (lock) { if (running) { throw new IOException("Indexer already running!"); } running = true; interrupted = false; } RuntimeEnvironment env = RuntimeEnvironment.getInstance(); reader = null; writer = null; settings = null; uidIter = null; postsIter = null; acceptedNonlocalSymlinks.clear(); IOException finishingException = null; try { Analyzer analyzer = AnalyzerGuru.getAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); iwc.setRAMBufferSizeMB(env.getRamBufferSize()); /** * Most data in OpenGrok is indexed but not stored, so use the best * compression on the minority of data that is stored, since it * should not have a detrimental impact on overall throughput. */ iwc.setCodec(new Lucene70Codec(Lucene50StoredFieldsFormat.Mode.BEST_COMPRESSION)); writer = new IndexWriter(indexDirectory, iwc); writer.commit(); // to make sure index exists on the disk completer = new PendingFileCompleter(); if (directories.isEmpty()) { if (project == null) { directories.add(""); } else { directories.add(project.getPath()); } } for (String dir : directories) { File sourceRoot; if ("".equals(dir)) { sourceRoot = env.getSourceRootFile(); } else { sourceRoot = new File(env.getSourceRootFile(), dir); } if (env.isHistoryEnabled()) { try { HistoryGuru.getInstance().ensureHistoryCacheExists(sourceRoot); } catch (HistoryException ex) { String exmsg = String.format("Failed to ensureHistoryCacheExists() for %s", sourceRoot); LOGGER.log(Level.SEVERE, exmsg, ex); continue; } } dir = Util.fixPathIfWindows(dir); String startuid = Util.path2uid(dir, ""); reader = DirectoryReader.open(indexDirectory); // open existing index settings = readAnalysisSettings(); if (settings == null) { settings = new IndexAnalysisSettings(); } Terms terms = null; int numDocs = reader.numDocs(); if (numDocs > 0) { Fields uFields = MultiFields.getFields(reader);//reader.getTermVectors(0); terms = uFields.terms(QueryBuilder.U); } try { if (terms != null) { uidIter = terms.iterator(); TermsEnum.SeekStatus stat = uidIter.seekCeil(new BytesRef(startuid)); //init uid if (stat == TermsEnum.SeekStatus.END) { uidIter = null; LOGGER.log(Level.WARNING, "Couldn''t find a start term for {0}, empty u field?", startuid); } } // The actual indexing happens in indexParallel(). IndexDownArgs args = new IndexDownArgs(); Statistics elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting traversal of directory {0}", dir); indexDown(sourceRoot, dir, args); showFileCount(dir, args, elapsed); args.cur_count = 0; elapsed = new Statistics(); LOGGER.log(Level.INFO, "Starting indexing of directory {0}", dir); indexParallel(dir, args); elapsed.report(LOGGER, String.format("Done indexing of directory %s", dir)); // Remove data for the trailing terms that indexDown() // did not traverse. These correspond to files that have been // removed and have higher ordering than any present files. while (uidIter != null && uidIter.term() != null && uidIter.term().utf8ToString().startsWith(startuid)) { removeFile(true); BytesRef next = uidIter.next(); if (next == null) { uidIter = null; } } markProjectIndexed(project); } finally { reader.close(); } } try { finishWriting(); } catch (IOException e) { finishingException = e; } } catch (RuntimeException ex) { LOGGER.log(Level.SEVERE, "Failed with unexpected RuntimeException", ex); throw ex; } finally { completer = null; try { if (writer != null) { writer.close(); } } catch (IOException e) { if (finishingException == null) { finishingException = e; } LOGGER.log(Level.WARNING, "An error occurred while closing writer", e); } finally { writer = null; synchronized (lock) { running = false; } } } if (finishingException != null) { throw finishingException; } if (!isInterrupted() && isDirty()) { if (env.isOptimizeDatabase()) { optimize(); } env.setIndexTimestamp(); } }