List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:com.mathworks.xzheng.tools.SpatialLuceneExample.java
License:Apache License
SpatialLuceneExample() throws IOException { directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)); writer = new IndexWriter(directory, config); }
From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java
License:Apache License
/** * Split a given index into 3 indexes for training, test and cross validation tasks respectively * * @param originalIndex an {@link org.apache.lucene.index.LeafReader} on the source index * @param trainingIndex a {@link Directory} used to write the training index * @param testIndex a {@link Directory} used to write the test index * @param crossValidationIndex a {@link Directory} used to write the cross validation index * @param analyzer {@link Analyzer} used to create the new docs * @param fieldNames names of fields that need to be put in the new indexes or <code>null</code> if all should be used * @throws IOException if any writing operation fails on any of the indexes *//* w w w . j a va 2s .c o m*/ public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex, Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException { // create IWs for train / test / cv IDXs IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer)); IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer)); IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer)); try { int size = originalIndex.maxDoc(); IndexSearcher indexSearcher = new IndexSearcher(originalIndex); TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE); // set the type to be indexed, stored, with term vectors FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setStoreTermVectors(true); ft.setStoreTermVectorOffsets(true); ft.setStoreTermVectorPositions(true); int b = 0; // iterate over existing documents for (ScoreDoc scoreDoc : topDocs.scoreDocs) { // create a new document for indexing Document doc = new Document(); if (fieldNames != null && fieldNames.length > 0) { for (String fieldName : fieldNames) { doc.add(new Field(fieldName, originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft)); } } else { for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) { if (storableField.readerValue() != null) { doc.add(new Field(storableField.name(), storableField.readerValue(), ft)); } else if (storableField.binaryValue() != null) { doc.add(new Field(storableField.name(), storableField.binaryValue(), ft)); } else if (storableField.stringValue() != null) { doc.add(new Field(storableField.name(), storableField.stringValue(), ft)); } else if (storableField.numericValue() != null) { doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft)); } } } // add it to one of the IDXs if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) { testWriter.addDocument(doc); } else if (cvWriter.maxDoc() < size * crossValidationRatio) { cvWriter.addDocument(doc); } else { trainingWriter.addDocument(doc); } b++; } } catch (Exception e) { throw new IOException(e); } finally { testWriter.commit(); cvWriter.commit(); trainingWriter.commit(); // close IWs testWriter.close(); cvWriter.close(); trainingWriter.close(); } }
From source file:com.meltmedia.cadmium.search.SearchContentPreprocessor.java
License:Apache License
@Override public synchronized void processFromDirectory(String metaDir) throws Exception { SearchHolder newStagedSearcher = new SearchHolder(); indexDir = new File(metaDir, "lucene-index"); dataDir = new File(metaDir).getParentFile(); newStagedSearcher.directory = new NIOFSDirectory(indexDir); IndexWriter iwriter = null;/*from w w w . ja v a 2s . co m*/ try { iwriter = new IndexWriter(newStagedSearcher.directory, new IndexWriterConfig(Version.LUCENE_43, analyzer).setRAMBufferSizeMB(5)); iwriter.deleteAll(); writeIndex(iwriter, dataDir); } finally { IOUtils.closeQuietly(iwriter); iwriter = null; } newStagedSearcher.indexReader = DirectoryReader.open(newStagedSearcher.directory); SearchHolder oldStage = stagedSearch; stagedSearch = newStagedSearcher; if (oldStage != null) { oldStage.close(); } log.info("About to call processSearchPreprocessors()"); processSearchPreprocessors(newStagedSearcher.indexReader, analyzer, "content"); }
From source file:com.meltwater.elasticsearch.index.RamDirectoryPercolatorIndex.java
License:Apache License
public Directory indexDocuments(List<ParsedDocument> parsedDocuments) { try {/*w ww .j ava 2 s . c om*/ Directory directory = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.LUCENE_4_10_4, mapperService.analysisService().defaultIndexAnalyzer()); IndexWriter iwriter = new IndexWriter(directory, conf); for (ParsedDocument document : parsedDocuments) { for (ParseContext.Document doc : document.docs()) { iwriter.addDocument(doc, document.analyzer()); } } iwriter.close(); return directory; } catch (IOException e) { throw new ElasticsearchException("Failed to write documents to RAMDirectory", e); } }
From source file:com.miliworks.virgo.test.LuceneIndexAndSearchDemo.java
License:Apache License
/** * //from w ww . j av a 2 s . c om * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(true); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_40, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_40, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.mycompany.mavenproject1.Main.java
public static void main(String[] args) throws IOException, ParseException { StandardAnalyzer analyzer = new StandardAnalyzer(); // Directory index = new RAMDirectory(); Directory index = new SimpleFSDirectory(Paths.get( "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\data")); IndexWriterConfig config = new IndexWriterConfig(analyzer); //config.setOpenMode(OpenMode.CREATE); IndexWriter w = new IndexWriter(index, config); try (ItemProvider provider = new ItemProvider( "C:\\Users\\slete\\Documents\\NetBeansProjects\\mavenproject1\\src\\main\\java\\com\\mycompany\\mavenproject1\\items.xml")) { while (provider.hasNext()) { Item item = provider.next(); addItem(w, item);//from w w w. j ava 2 s . c o m } } catch (XMLStreamException | IOException ex) { ex.getMessage(); } // w.commit(); w.close(); // String queryStr = "id:1* NOT id:19*"; String a = "id:1* NOT id:19*"; String b = "name:Dekielek AND description:(ty AND obiektywu)"; String c = "category:Dek*"; String ds = "id:1232~2"; String e = "price:[0.0 TO 100.0]"; Query q = new QueryParser("name", analyzer).parse(ds); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, hitsPerPage); ScoreDoc[] hits = docs.scoreDocs; System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out .println(d.get("id") + "\t" + d.get("price") + "\t" + d.get("name") + "\t" + d.get("category"));//+"\t" + d.get("description")); } }
From source file:com.mycompany.restlet.search.sample.indexer.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//w w w .j ava 2s. c om boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); // MorphemeAnalyzer ma = new MorphemeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }