List of usage examples for org.apache.lucene.index IndexWriter IndexWriter
public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException
conf
. From source file:com.bull.aurocontrol.csst.poc.index.interval.BaseIntervalQueryTest.java
License:Apache License
@Before public void setUp() throws IOException { RAMDirectory ramDirectory = new RAMDirectory(); indexWriter = new IndexWriter(ramDirectory, new IndexWriterConfig(Version.LUCENE_35, new KeywordAnalyzer()).setOpenMode(OpenMode.CREATE)); }
From source file:com.burkeware.search.api.internal.provider.WriterProvider.java
License:Open Source License
@Override public IndexWriter get() throws IOException { Directory directory = directoryProvider.get(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, analyzer); return new IndexWriter(directory, config); }
From source file:com.cep.darkstar.onramp.djnews.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//from w w w. j av a 2s . co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call optimize here. This can be // a costly operation, so generally it's only worth // it when your index is relatively static (ie you're // done adding documents to it): // // writer.optimize(); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.chenyi.langeasy.lucene.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void index(String docsPath) { // String indexPath = "index"; String indexPath = "F:/Personal/ws_indigo/lucene/index"; // String docsPath = null; // String docsPath = "E:/langeasy/lucene/podcast"; // boolean create = true; // String docsPath = "E:/langeasy/lucene/tv"; // String docsPath = "E:/langeasy/lucene/srt"; // String docsPath = "E:/langeasy/lucene/podcast/freshair/transcript"; // String docsPath = "E:/langeasy/lucene/podcast/money/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/allthingsconsidered/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/morningedition/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/wait-wait-dont-tell-me/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/invisibilia/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/hidden-brain/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/weekend-edition-saturday/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/weekend-edition-sunday/transcript"; // String docsPath = "E:/langeasy/lucene/podcast/politics/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/ask-me-another/transcript"; // String docsPath = "E:/langeasy/lucene/podcast/ted-talks/transcript"; // String docsPath = // "E:/langeasy/lucene/podcast/freakonomics/transcript"; // String docsPath = "E:/langeasy/lucene/podcast/serial/transcript"; // String docsPath = "E:/langeasy/lucene/podcast/ted-ed/transcript"; // String docsPath = "E:/langeasy/lucene/podcast/yale-courses/transcript"; // String docsPath = "E:/langeasy/lucene/youtube/nasa/caption"; // String docsPath = "E:/langeasy/lucene/youtube/movieclipsTRAILERS/caption"; // String docsPath = "E:/langeasy/lucene/youtube/Vsauce/caption"; // String docsPath = "E:/langeasy/lucene/youtube/vice/caption"; // String docsPath = "E:/langeasy/lucene/youtube/DiscoveryNetworks/caption"; // String docsPath = "E:/langeasy/lucene/youtube/collegehumor/caption"; // String docsPath = "E:/langeasy/lucene/youtube/AnimalPlanetTV/caption"; // String docsPath = "E:/langeasy/lucene/youtube/AsapSCIENCE/caption"; // String docsPath = "E:/langeasy/lucene/youtube/latenight/caption"; // String docsPath = "E:/langeasy/lucene/youtube/rhettandlink2/caption"; // String docsPath = "E:/langeasy/lucene/youtube/TheEllenShow/caption"; // String docsPath = "E:/langeasy/lucene/youtube/zoella280390/caption"; // String docsPath = "E:/langeasy/lucene/youtube/cnn-breaking-news/caption"; // String docsPath = "E:/langeasy/lucene/youtube/TEDxTalks"; // String docsPath = "E:/langeasy/lucene/youtube/spotlight"; // String docsPath = "E:/langeasy/lucene/youtube/Howcast"; boolean create = false; final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1);//from w w w .ja va 2s . c om } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }// w w w .ja v a2 s . co m String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.codenvy.test.lucene.DeleteFilesWithSameName.java
License:Open Source License
public static void main(String[] args) throws Exception { String DOC_DIR_NAME = "files"; filesDirPath = Paths.get(DOC_DIR_NAME).toAbsolutePath().toString(); Path indexPath = Paths.get("index"); Path docDir = Paths.get(DOC_DIR_NAME); Path file1 = Paths.get(DOC_DIR_NAME, "File1"); Path file2 = Paths.get(DOC_DIR_NAME, "File1A"); Analyzer analyzer = new SimpleAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); //iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); if (!Files.isReadable(docDir)) { System.out.println("document folder not found"); return;/*from w ww . ja va2 s .c o m*/ } Directory index = FSDirectory.open(indexPath); IndexWriter writer = new IndexWriter(index, iwc); //add files to index indexDocs(writer, file1); indexDocs(writer, file2); writer.commit(); searchAndPrintResult(indexPath); //delete files System.out.println(); System.out.println("=================================================================="); System.out.println("delete by prefix \"" + filesDirPath + "/File1\""); Query query = new PrefixQuery(new Term(PATH, filesDirPath + "/File1")); writer.deleteDocuments(query); writer.close(); searchAndPrintResult(indexPath); }
From source file:com.codenvy.test.lucene.IndexFiles.java
License:Open Source License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;//w w w .ja va 2s .co m boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.cohesionforce.search.EMFIndex.java
License:Open Source License
/** * Initialize the search using the directory. The directory will be created * if it does not exist./* w w w .j a v a2 s . c o m*/ * * @param directory * - the directory to use for the index * @throws IOException * for problems creating the directory or the index */ public void initialize(String directory) throws IOException { boolean isNew = false; File dir = new File(directory); if (!dir.exists()) { boolean created = dir.mkdir(); if (!created) { logger.error("Could not create directory {}", dir); return; } isNew = true; } fsDir = FSDirectory.open(dir); analyzer = new StandardAnalyzer(version); IndexWriterConfig config = new IndexWriterConfig(version, analyzer); writer = new IndexWriter(fsDir, config); // If this is a new directory, we will need to create an empty commit to // initialize the index if (isNew) { writer.commit(); } }
From source file:com.common.search.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { // Lucene Document String fieldName = "text"; // //from ww w .ja v a 2 s .c o m String text = "IK Analyzer "; // IKAnalyzer Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); // IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED)); iwriter.addDocument(doc); iwriter.close(); // ********************************** // ireader = IndexReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = ""; // QueryParserQuery QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); // 5 TopDocs topDocs = isearcher.search(query, 5); System.out.println(" " + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println(" " + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.company.Indexer.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "index"; String docsPath = null;/*from w ww.j av a2s . com*/ boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); PrintWriter tempwriter = new PrintWriter("pathToDocs.txt", "UTF-8"); tempwriter.print(docsPath); tempwriter.close(); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }