List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:com.lucene.index.test.IKAnalyzerdemoMutilField.java
License:Apache License
/** * /*w w w.j a va2 s . c o m*/ * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text1 = "oracle?"; String text2 = "?"; String text3 = "?"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc1 = new Document(); doc1.add(new StringField("ID", "10000", Field.Store.YES)); doc1.add(new TextField(fieldName, text1, Field.Store.YES)); iwriter.addDocument(doc1); Document doc2 = new Document(); doc2.add(new StringField("ID", "10000", Field.Store.YES)); doc2.add(new TextField(fieldName, text2, Field.Store.YES)); iwriter.addDocument(doc2); Document doc3 = new Document(); doc3.add(new StringField("ID", "10000", Field.Store.YES)); doc3.add(new TextField(fieldName, text3, Field.Store.YES)); iwriter.addDocument(doc3); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.lucene.index.test.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;/*from w w w.j ava 2 s .c o m*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); long Cbegintime = System.nanoTime();// try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); long Cendtime = System.nanoTime();// Date end = new Date(); System.out.println(""); // ? BigDecimal diff = BigDecimal.valueOf(Cendtime - Cbegintime, 10);// double time = diff.setScale(4, BigDecimal.ROUND_HALF_UP).doubleValue(); System.out.println(end.getTime() - start.getTime() + "millsecond"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.main.Indexer.java
public void indexing() throws TikaException, SAXException { //Input folder String docsPath = "C:\\Users\\piyush\\Documents\\NetBeansProjects\\luceneFinal\\indexing\\doc"; //Output folder String indexPath = "C:\\Users\\piyush\\Documents\\NetBeansProjects\\luceneFinal\\indexing\\index"; //Input Path Variable final Path docDir = Paths.get(docsPath); try {/* ww w.j a v a 2 s . c o m*/ //org.apache.lucene.store.Directory instance Directory dir = FSDirectory.open(Paths.get(indexPath)); //analyzer with the default stop words Analyzer analyzer = new StandardAnalyzer(); //IndexWriter Configuration IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); //IndexWriter writes new index files to the directory IndexWriter writer = new IndexWriter(dir, iwc); //Its recursive method to iterate all files and directories indexDocs(writer, docDir); writer.close(); } catch (IOException e) { e.printStackTrace(); } }
From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java
License:Apache License
/** * Split a given index into 3 indexes for training, test and cross validation tasks respectively * * @param originalIndex an {@link org.apache.lucene.index.LeafReader} on the source index * @param trainingIndex a {@link Directory} used to write the training index * @param testIndex a {@link Directory} used to write the test index * @param crossValidationIndex a {@link Directory} used to write the cross validation index * @param analyzer {@link Analyzer} used to create the new docs * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used * @throws IOException if any writing operation fails on any of the indexes *//*from w ww. j a v a2 s . co m*/ public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException { // create IWs for train / test / cv IDXs IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer)); try { int size = originalIndex.maxDoc(); IndexSearcher indexSearcher = new IndexSearcher(originalIndex); TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); int b = 0; // iterate over existing documents for (ScoreDoc scoreDoc : topDocs.scoreDocs) { // create a new document for indexing Document doc = new Document(); if (fieldNames != null && fieldNames.length > 0) { for (String fieldName : fieldNames) { doc.add(new Field(fieldName, originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft)); } } else { for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) { if (storableField.readerValue() != null) { doc.add(new Field(storableField.name(), storableField.readerValue(), ft)); } else if (storableField.binaryValue() != null) { doc.add(new Field(storableField.name(), storableField.binaryValue(), ft)); } else if (storableField.stringValue() != null) { doc.add(new Field(storableField.name(), storableField.stringValue(), ft)); } else if (storableField.numericValue() != null) { doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft)); } } } // add it to one of the IDXs if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) { testWriter.addDocument(doc); } else if (cvWriter.maxDoc() < size * crossValidationRatio) { cvWriter.addDocument(doc); } else { trainingWriter.addDocument(doc); } b++; } } catch (Exception e) { throw new IOException(e); } finally { testWriter.commit(); cvWriter.commit(); trainingWriter.commit(); // close IWs testWriter.close(); cvWriter.close(); trainingWriter.close(); } }
From source file:com.mycompany.mavenproject1.Main.java
public static void main(String[] args) throws IOException, ParseException { StandardAnalyzer analyzer = new StandardAnalyzer(); // Directory index = new RAMDirectory(); Directory index = new SimpleFSDirectory(Paths.get( "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\data")); IndexWriterConfig config = new IndexWriterConfig(analyzer); //config.setOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(index, config); try (ItemProvider provider = new ItemProvider( "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\items.xml")) { while (provider.hasNext()) { Item item = provider.next(); addItem(w, item);/*from w w w . j ava 2 s.c o m*/ } } catch (XMLStreamException | IOException ex) { ex.getMessage(); } // w.commit(); w.close(); // String queryStr = "id:1* NOT id:19*"; String a = "id:1* NOT id:19*"; String b = "name:Dekielek AND description:(ty AND obiektywu)"; String c = "category:Dek*"; String ds = "id:1232~2"; String e = "price:[0.0 TO 100.0]"; Query q = new QueryParser("name", analyzer).parse(ds); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, hitsPerPage); ScoreDoc[] hits = docs.scoreDocs; System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out .println(d.get("id") + "\t" + d.get("price") + "\t" + d.get("name") + "\t" + d.get("category"));//+"\t" + d.get("description")); } }
From source file:com.mycompany.restlet.search.sample.indexer.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//www .java 2 s . c o m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); // MorphemeAnalyzer ma = new MorphemeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.nuvolect.deepdive.lucene.Index.java
public static JSONObject index(final String volumeId, final String searchPath, final boolean forceIndex) { if (m_interrupt[0]) { LogUtil.log(LogUtil.LogType.INDEX, "Index canceled post interrupt"); m_interrupt[0] = false;/*from w w w . j a v a2s . c o m*/ return responseInterruptIndexing(); } OmniFile cacheDir = IndexUtil.getCacheDir(volumeId, searchPath); boolean cacheDirCreated = false; try { cacheDirCreated = OmniUtil.forceMkdir(cacheDir); } catch (IOException e) { return responseFolderCreateError(searchPath); } final String luceneDirPath = cacheDir.getAbsolutePath(); boolean cacheDirExists = !cacheDirCreated; boolean indexingOngoing = m_indexThread != null && m_indexThread.isAlive(); boolean indexingRequired = !cacheDirExists || forceIndex; synchronized (m_lock) { if (indexingOngoing) { if (m_fileTreeActive) m_index_state = INDEX_STATE.filetree; else m_index_state = INDEX_STATE.indexing; } else { if (indexingRequired) m_index_state = INDEX_STATE.indexing; else m_index_state = INDEX_STATE.complete; } } if (indexingRequired || indexingOngoing) { if (indexingOngoing) { // Nothing to do, let the background process run. Monitor m_indexedDocs for progress. } else { synchronized (m_lock) { m_index_state = INDEX_STATE.filetree; m_totalDocs[0] = 0; m_indexedDocs[0] = 0; m_error[0] = ""; } m_threadGroup = new ThreadGroup(INDEX_THREAD_GROUP); m_indexThread = new Thread(m_threadGroup, new Runnable() { @Override public void run() { // Analyzer analyzer = new org.apache.lucene.analysis.core.WhitespaceAnalyzer(); // Analyzer analyzer = new org.apache.lucene.analysis.core.KeywordAnalyzer(); // Analyzer analyzer = new org.apache.lucene.analysis.standard.StandardAnalyzer(); Analyzer analyzer = new org.apache.lucene.analysis.core.SimpleAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter iwriter = null; try { Directory m_directory = FSDirectory.open(Paths.get(luceneDirPath)); iwriter = new IndexWriter(m_directory, config); iwriter.deleteAll(); iwriter.commit(); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.INDEX, e); m_error[0] = "IndexWriter constructor exception"; } synchronized (m_lock) { m_fileTreeActive = true; m_index_state = INDEX_STATE.filetree; } Collection<OmniFile> files = IndexUtil.getFilePaths(volumeId, searchPath); synchronized (m_lock) { m_index_state = INDEX_STATE.indexing; m_fileTreeActive = false; m_totalDocs[0] = files.size(); m_indexedDocs[0] = 0; } try { for (OmniFile file : files) { if (m_interrupt[0]) { LogUtil.log(LogUtil.LogType.INDEX, "Iterator loop canceled"); break; } String path = file.getPath(); // LogUtil.log(LogUtil.LogType.INDEX, "indexing: " + path);// this is a bit excessive iwriter.addDocument(makeDoc(volumeId, path)); synchronized (m_lock) { ++m_indexedDocs[0]; } } iwriter.commit(); iwriter.close(); synchronized (m_lock) { m_index_state = m_interrupt[0] ? INDEX_STATE.interrupted : INDEX_STATE.complete; m_totalDocs[0] = m_indexedDocs[0]; } } catch (Exception e) { LogUtil.logException(LogUtil.LogType.INDEX, e); m_error[0] = "IndexWriter addDocument exception"; } } }, INDEX_THREAD, STACK_SIZE); m_indexThread.setPriority(Thread.MAX_PRIORITY); m_indexThread.start(); } } else { // Indexing is complete // Get number of documents indexed try { Directory directory = FSDirectory.open(Paths.get(luceneDirPath)); DirectoryReader ireader = DirectoryReader.open(directory); synchronized (m_lock) { m_indexedDocs[0] = ireader.numDocs(); m_totalDocs[0] = m_indexedDocs[0]; m_index_state = INDEX_STATE.complete; } ireader.close(); directory.close(); } catch (IOException e) { LogUtil.logException(LogUtil.LogType.INDEX, e); } } JSONObject result = new JSONObject(); try { synchronized (m_lock) { result.put("index_state", m_index_state.toString()); result.put("error", m_error[0]); result.put("indexed_docs", m_indexedDocs[0]); result.put("total_docs", m_totalDocs[0]); // result.put("full_path", cacheDir.getAbsolutePath()); result.put("search_path", searchPath); } } catch (JSONException e) { e.printStackTrace(); } return result; }
From source file:com.o19s.es.explore.ExplorerQueryTests.java
License:Apache License
@Before public void setupIndex() throws Exception { dir = new RAMDirectory(); try (IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER))) { for (int i = 0; i < docs.length; i++) { Document doc = new Document(); doc.add(new Field("_id", Integer.toString(i + 1), StoredField.TYPE)); doc.add(newTextField("text", docs[i], Field.Store.YES)); indexWriter.addDocument(doc); }/*w w w. j av a 2s . com*/ } reader = DirectoryReader.open(dir); searcher = new IndexSearcher(reader); }
From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java
License:Apache License
@Override public IndexWriter openIndexWriter(Directory directory) throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); return new IndexWriter(directory, iwc); }
From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java
License:Apache License
@Override public IndexWriter createIndexWriter(Directory directory) throws IOException { IndexWriterConfig iwc = new IndexWriterConfig(indexAnalyzer()); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); return new IndexWriter(directory, iwc); }