List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode
public IndexWriterConfig setOpenMode(OpenMode openMode)
From source file:gov.noaa.pfel.erddap.util.EDStatic.java
License:Open Source License
/** * This creates an IndexWriter./*from w ww . ja va 2 s. c o m*/ * Normally, this is created once in RunLoadDatasets. * But if trouble, a new one will be created. * * @throws RuntimeException if trouble */ public static void createLuceneIndexWriter(boolean firstTime) { try { String2.log("createLuceneIndexWriter(" + firstTime + ")"); long tTime = System.currentTimeMillis(); //if this is being called, directory shouldn't be locked //see javaDocs for indexWriter.close() if (IndexWriter.isLocked(luceneDirectory)) IndexWriter.unlock(luceneDirectory); //create indexWriter IndexWriterConfig lucConfig = new IndexWriterConfig(luceneVersion, luceneAnalyzer); lucConfig.setOpenMode( firstTime ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.CREATE_OR_APPEND); luceneIndexWriter = new IndexWriter(luceneDirectory, lucConfig); luceneIndexWriter.setInfoStream(verbose ? new PrintStream(new String2LogOutputStream()) : null); String2.log(" createLuceneIndexWriter finished. time=" + (System.currentTimeMillis() - tTime) + "ms"); } catch (Throwable t) { throw new RuntimeException(t); } }
From source file:gov.ssa.test.lucenedemo.IndexFiles.java
/** * Index all text files under a directory. *///from w w w. j ava 2 s. c om public void doIndexing(String _indexPath, String _docsPath) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = _indexPath; String docsPath = _docsPath; boolean create = true; /* for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } */ if (docsPath == null) { System.err.println("docsPath is null...."); return; } final Path docDir = Paths.get(docsPath); if (!Files.isReadable(docDir)) { System.out.println("Document directory '" + docDir.toAbsolutePath() + "' does not exist or is not readable, please check the path"); return; } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:HW1.generateIndex.java
License:Apache License
/** Index all text files under a directory. */ public static void main(String[] args) { String filePath = "/Users/yangyang/Desktop/lucene/corpus"; File folder = new File(filePath); File[] files = folder.listFiles(); String[] fields = { "DOCNO", "HEAD", "BYLINE", "DATELINE", "TEXT" }; ArrayList<HashMap<String, String>> documents = new ArrayList<HashMap<String, String>>(); int num = 0;/*from w ww . j av a2s .c o m*/ for (File file : files) { // read each file BufferedReader br = null; String line; try { br = new BufferedReader(new FileReader(file)); String xmlRecords = ""; while ((line = br.readLine()) != null) { // change "&" to "&" to avoid bug in parse XML if (line.contains("&")) { line = line.replaceAll("&", "&"); } if (line.startsWith("<DOC>")) { xmlRecords = line; } else if (line.startsWith("</DOC>")) { xmlRecords += line; // use ReadXMLFile.java to parse the XMLfile string num += 1; ReadXMLFile r = new ReadXMLFile(); HashMap<String, String> document = r.parse(xmlRecords, fields); // System.out.println(document.toString()); documents.add(document); } else { xmlRecords += line + " "; } } } catch (Exception e) { e.printStackTrace(); } } System.out.println(num); String[] indexPaths = { "/Users/yangyang/Desktop/lucene/index/index01", "/Users/yangyang/Desktop/lucene/index/index02", "/Users/yangyang/Desktop/lucene/index/index03", "/Users/yangyang/Desktop/lucene/index/index04", }; for (String indexPath : indexPaths) { try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = null; if (indexPath.endsWith("1")) { analyzer = new KeywordAnalyzer(); } else if (indexPath.endsWith("2")) { analyzer = new SimpleAnalyzer(); } else if (indexPath.endsWith("3")) { analyzer = new StopAnalyzer(); } else if (indexPath.endsWith("4")) { analyzer = new StandardAnalyzer(); } IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, iwc); for (HashMap<String, String> doc : documents) { indexDoc(writer, doc); } writer.close(); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } } }
From source file:iac.cnr.it.Indexer.java
License:Apache License
public Indexer(String indexDir, boolean create, boolean fork, boolean ocr) throws IOException { logger.entry();//w w w. ja v a2s. c o m this.fork = fork; numErrors = 0; numFiles = 0; Directory dir = FSDirectory.open(Paths.get(indexDir)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { iwc.setOpenMode(OpenMode.CREATE); logger.info("Configuration specified to create a new index or overwrites an existing one."); } else { iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); logger.info( "Configuration specified to create a new index if one does not exist, otherwise the index will be opened and documents will be appended."); } writer = new IndexWriter(dir, iwc); Parser autoDetectParser = new AutoDetectParser(); context = new ParseContext(); if (ocr) { TesseractOCRConfig ocrConfig = new TesseractOCRConfig(); PDFParserConfig pdfConfig = new PDFParserConfig(); pdfConfig.setExtractInlineImages(true); pdfConfig.setExtractUniqueInlineImagesOnly(false); context.set(Parser.class, autoDetectParser); context.set(TesseractOCRConfig.class, ocrConfig); context.set(PDFParserConfig.class, pdfConfig); } if (fork) { parser = new ForkParser(ForkParser.class.getClassLoader(), autoDetectParser); } else { parser = autoDetectParser; } logger.exit(); }
From source file:ie.cmrc.smtx.etl.index.lucene.LuceneSKOSConceptIndexer.java
License:Apache License
/** * {@inheritDoc}//from w ww. j av a 2 s . c o m * @param thesaurus {@inheritDoc} * @param indexDirFile Output index directory * @return {@inheritDoc} * @throws NotDirectoryException {@inheritDoc} * @throws IOException {@inheritDoc} */ @Override public boolean indexSKOSThesaurus(SKOS thesaurus, File indexDirFile) throws NotDirectoryException, IOException { boolean success = false; if (thesaurus != null) { if (indexDirFile != null) { // Check index directory if (!indexDirFile.exists()) { // Index directory does not exist, create it if (verbose) System.out.println(" - Directory \"" + indexDirFile.getAbsolutePath() + "\" does not exist. Will create it..."); boolean created = indexDirFile.mkdirs(); if (created) { if (verbose) System.out.println( " --> Created directory \"" + indexDirFile.getAbsolutePath() + "\"."); } else { if (verbose) System.out.println(" * ERROR: Could not create directory \"" + indexDirFile.getAbsolutePath() + "\"!"); return false; } } else { if (indexDirFile.isDirectory()) { // Index directory exists and is actually a directory // Clear content for (File f : indexDirFile.listFiles()) { if (!f.delete() && verbose) System.out.println( " * WARNING: Could not delete file \"" + f.getAbsolutePath() + "\"!"); } } else { // A file exists with the same name but is not a directory if (verbose) System.out.println(" * ERROR: File \"" + indexDirFile.getAbsolutePath() + "\" exists but is not a directory!"); throw new NotDirectoryException(indexDirFile.getAbsolutePath()); } } // Initialise analyser if (verbose) System.out.println(" - Initialising index analysers..."); Analyzer analyser = SKOSAnalyzerFactory.createSmartSKOSMultilingualAnalyser(this.languages); if (verbose) System.out.println(" --> Analysers intialised."); // Configure index writer and connect to index directory if (verbose) System.out.println(" - Configuring index writer and connecting to index directory..."); IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, analyser); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(indexDirFile), iwc); if (verbose) System.out.println(" --> Done configuring and connecting index writer."); if (verbose) System.out .println(" - Indexing concepts. Please be patient as this may take a few minutes..."); int counter = 0; int errors = 0; CloseableIterator<SKOSConcept> iter = thesaurus.listConcepts(); while (iter.hasNext()) { counter++; SKOSConcept concept = iter.next(); Document doc = this.indexConcept(concept); if (doc != null) { try { indexWriter.addDocument(doc); } catch (IOException ex) { errors++; if (verbose) System.out.println( " * ERROR: Could not index concept \"" + concept.getURI() + "\"!"); } } } iter.close(); if (verbose) System.out .println(" --> Indexed " + counter + " concepts(s) with " + errors + " error(s)."); if (verbose) System.out.println(" - Closing connection to index directory..."); indexWriter.close(); if (verbose) System.out.println(" --> Closed connection."); success = true; } else { throw new IllegalArgumentException("Index directory is null"); } } else { throw new IllegalArgumentException("Thesaurus parameter is null"); } return success; }
From source file:ie.cmrc.smtx.etl.index.lucene.LuceneSKOSIndexer.java
License:Apache License
/** * {@inheritDoc}//from w w w.j av a2 s .c o m * @param thesaurus {@inheritDoc} * @param indexDirFile Output directory * @return {@inheritDoc} * @throws NotDirectoryException {@inheritDoc} * @throws IOException {@inheritDoc} */ @Override public boolean indexSKOSThesaurus(SKOS thesaurus, File indexDirFile) throws NotDirectoryException, IOException { boolean success = false; if (thesaurus != null) { if (indexDirFile != null) { // Check index directory if (!indexDirFile.exists()) { // Index directory does not exist, create it if (verbose) System.out.println(" - Directory \"" + indexDirFile.getAbsolutePath() + "\" does not exist. Will create it..."); boolean created = indexDirFile.mkdirs(); if (created) { if (verbose) System.out.println( " --> Created directory \"" + indexDirFile.getAbsolutePath() + "\"."); } else { if (verbose) System.out.println(" * ERROR: Could not create directory \"" + indexDirFile.getAbsolutePath() + "\"!"); return false; } } else { if (indexDirFile.isDirectory()) { // Index directory exists and is actually a directory // Clear content for (File f : indexDirFile.listFiles()) { if (!f.delete() && verbose) System.out.println( " * WARNING: Could not delete file \"" + f.getAbsolutePath() + "\"!"); } } else { // A file exists with the same name but is not a directory if (verbose) System.out.println(" * ERROR: File \"" + indexDirFile.getAbsolutePath() + "\" exists but is not a directory!"); throw new NotDirectoryException(indexDirFile.getAbsolutePath()); } } // Initialise analyser if (verbose) System.out.println(" - Initialising index analysers..."); Analyzer analyser = SKOSAnalyzerFactory.createSmartSKOSMultilingualAnalyser(this.languages); if (verbose) System.out.println(" --> Analysers intialised."); // Configure index writer and connect to index directory if (verbose) System.out.println(" - Configuring index writer and connecting to index directory..."); IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, analyser); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(indexDirFile), iwc); if (verbose) System.out.println(" --> Done configuring and connecting index writer."); if (verbose) System.out .println(" - Indexing resources. Please be patient as this may take a few minutes..."); int counter = 0; int errors = 0; CloseableIterator<SKOSResource> iter = thesaurus.listSKOSResources(); while (iter.hasNext()) { counter++; SKOSResource resource = iter.next(); Document doc = this.indexResource(resource); if (doc != null) { try { indexWriter.addDocument(doc); } catch (IOException ex) { errors++; if (verbose) System.out.println( " * ERROR: Could not index resource \"" + resource.getURI() + "\"!"); } } } iter.close(); if (verbose) System.out.println( " --> Indexed " + counter + " resources(s) with " + errors + " error(s)."); if (verbose) System.out.println(" - Closing connection to index directory..."); indexWriter.close(); if (verbose) System.out.println(" --> Closed connection."); success = true; } else { throw new IllegalArgumentException("Index directory file is null"); } } else { throw new IllegalArgumentException("Thesaurus parameter is null"); } return success; }
From source file:ikanalyzer.LuceneIndexAndSearchDemo.java
License:Apache License
/** * ???/*from w w w .java2s .c o m*/ * * @param args */ public static void main(String[] args) { // Lucene Document?? String fieldName = "text"; // String text = "IK Analyzer???????"; // IKAnalyzer? Analyzer analyzer = new IKAnalyzer(true); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); // ?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new StringField("ID", "10000", Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); // ?********************************** // ? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; // QueryParser?Query QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); // ?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:in.student.project.index.IndexTrec.java
License:Apache License
/** Indexer for Trec files. */ public static void main(String[] argv) { try {/*from ww w. j av a 2 s . c o m*/ File index = new File("index"); boolean create = false; File root = null; String usage = "IndexTrec [-create] [-index <index>] <root_directory>"; if (argv.length == 0) { System.err.println("Usage: " + usage); return; } for (int i = 0; i < argv.length; i++) { if (argv[i].equals("-index")) { // parse -index option index = new File(argv[++i]); } else if (argv[i].equals("-create")) { // parse -create option create = true; } else if (i != argv.length - 1) { System.err.println("Usage: " + usage); return; } else root = new File(argv[i]); } Date start = new Date(); if (!create) { // delete stale docs deleting = true; indexDocs(root, index, create); } Directory dir = FSDirectory.open(index); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } writer = new IndexWriter(dir, iwc); indexDocs(root, index, create); // add new docs writer.close(); Date end = new Date(); System.out.print(end.getTime() - start.getTime()); System.out.println(" total milliseconds"); } catch (Exception e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:index.Indexcategory.java
public static void main(String[] args) throws IOException { String indexPath = "/Users/smita/Documents/ES/index/abstract/"; String docsPath = null;// w w w . j a va 2s. c om boolean create = true; String path = "/Users/smita/Documents/data/dbpedia/short_abstracts_en.nq"; Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } IndexWriter writer = new IndexWriter(dir, iwc); FileInputStream inputStream = null; Scanner sc = null; try { int linecount = 0; inputStream = new FileInputStream(path); sc = new Scanner(inputStream, "UTF-8"); String ignore = sc.nextLine(); while (sc.hasNextLine()) { linecount++; String line = sc.nextLine(); //System.out.println(line); try { String article = line.split("> ")[0]; String category = line.split("> ")[2]; //System.out.println(article+" ++ "+category); //index row article = article.substring(29, article.length() - 1); //category=category.substring(38,category.length()-1); //System.out.println(article+" "+category); Document doc = new Document(); doc.add(new TextField("article", article, Field.Store.YES)); doc.add(new TextField("category", category, Field.Store.YES)); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { System.out.println("adding " + linecount); writer.addDocument(doc); } else { System.out.println("updating "); //writer.updateDocument(new Term("path", file.toString()), doc); } } catch (Exception e) { } } if (sc.ioException() != null) { throw sc.ioException(); } } finally { if (inputStream != null) { inputStream.close(); } if (sc != null) { sc.close(); } } writer.close(); }
From source file:index.IndexCoreMeta.java
public static void main(String[] args) throws FileNotFoundException, IOException, JSONException { String indexPath = "/Users/smita/Documents/ES/index/meta"; String path = "/Users/smita/Documents/data/core/meta/"; String docsPath = null;//from w w w . j a v a2 s . c om boolean create = true; Date start = new Date(); System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); //directory of data File folder = new File("/Users/smita/Documents/data/core/meta/"); String[] files = folder.list(); for (int i = 0; i < 870; i++) { String filename = path + files[i]; System.out.println(filename); if (filename.endsWith(".DS_Store")) { } else { readFile(writer, filename); } } writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); }