List of usage examples for org.apache.lucene.index CheckIndex CheckIndex
public CheckIndex(Directory dir) throws IOException
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
private void upgradeIndex(Directory dir) throws IOException { boolean doUpgrade = false; IndexWriterConfig iwc = new IndexWriterConfig(null); CheckIndex chkIndex = new CheckIndex(dir); try {// w ww.j a v a2s . c o m for (CheckIndex.Status.SegmentInfoStatus segmentInfo : chkIndex.checkIndex().segmentInfos) { if (!segmentInfo.version.equals(Version.LATEST)) { logInfo("Found Index version %s", segmentInfo.version.toString()); doUpgrade = true; break; } } } finally { chkIndex.close(); } if (doUpgrade) { logInfo("Upgrading index to %s", Version.LATEST.toString()); new IndexUpgrader(dir, iwc, false).upgrade(); this.indexUpdateTimeMicros = Utils.getNowMicrosUtc(); } }
From source file:com.zimbra.cs.index.LuceneIndex.java
License:Open Source License
/** * Run a sanity check for the index. Callers are responsible to make sure the index is not opened by any writer. * * @param out info stream where messages should go. If null, no messages are printed. * @return true if no problems were found, otherwise false * @throws IOException failed to verify, but it doesn't necessarily mean the index is corrupted. */// w w w. ja v a2 s . c o m @Override public boolean verify(PrintStream out) throws IOException { if (!IndexReader.indexExists(luceneDirectory)) { out.println("index does not exist or no segments file found: " + luceneDirectory.getDirectory()); return true; } CheckIndex check = new CheckIndex(luceneDirectory); if (out != null) { check.setInfoStream(out); } CheckIndex.Status status = check.checkIndex(); return status.clean; }
From source file:com.zimbra.cs.index.LuceneViewer.java
License:Open Source License
private static void doCheck(CommandLine cl) throws Exception { Console console = new Console(cl.hasOption(CLI.O_VERBOSE)); String indexDir = cl.getOptionValue(CLI.O_INPUT); console.info("Checking index " + indexDir); Directory dir = null;//from ww w .ja va2 s. com try { dir = LuceneDirectory.open(new File(indexDir)); } catch (Throwable t) { console.info("ERROR: could not open directory \"" + indexDir + "\"; exiting"); t.printStackTrace(System.out); System.exit(1); } CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(System.out); Status result = checker.checkIndex(); console.info("Result:" + (result.clean ? "clean" : "not clean")); }
From source file:io.anserini.integration.EndToEndTest.java
License:Apache License
protected void checkIndex() throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(1024); Directory dir = FSDirectory.open(Paths.get(this.indexOutputPrefix + this.collectionClass)); CheckIndex checker = new CheckIndex(dir); checker.setInfoStream(new PrintStream(bos, false, IOUtils.UTF_8)); if (VERBOSE)//ww w.ja v a2 s . co m checker.setInfoStream(System.out); CheckIndex.Status indexStatus = checker.checkIndex(); if (!indexStatus.clean) { System.out.println("CheckIndex failed"); System.out.println(bos.toString(IOUtils.UTF_8)); fail(); } final CheckIndex.Status.SegmentInfoStatus seg = indexStatus.segmentInfos.get(0); assertTrue(seg.openReaderPassed); assertNotNull(seg.diagnostics); assertNotNull(seg.fieldNormStatus); assertNull(seg.fieldNormStatus.error); assertEquals(this.fieldNormStatusTotalFields, seg.fieldNormStatus.totFields); assertNotNull(seg.termIndexStatus); assertNull(seg.termIndexStatus.error); assertEquals(this.termIndexStatusTermCount, seg.termIndexStatus.termCount); assertEquals(this.termIndexStatusTotFreq, seg.termIndexStatus.totFreq); assertEquals(this.termIndexStatusTotPos, seg.termIndexStatus.totPos); assertNotNull(seg.storedFieldStatus); assertNull(seg.storedFieldStatus.error); assertEquals(this.storedFieldStatusTotalDocCounts, seg.storedFieldStatus.docCount); assertEquals(this.storedFieldStatusTotFields, seg.storedFieldStatus.totFields); assertTrue(seg.diagnostics.size() > 0); final List<String> onlySegments = new ArrayList<>(); onlySegments.add("_0"); assertTrue(checker.checkIndex(onlySegments).clean); checker.close(); }
From source file:kbp2013.index.IndexSourceCorpus.java
License:Open Source License
public static void main(String[] args) throws IOException { initializeFromDefault();/* ww w . jav a 2 s . c o m*/ int managed = 0; // counter to count idents int counted = 0; // when to display int tocount = 10; System.out.println("Indexing to directory '" + luceneIndex + "'..."); INDEX_DIR = new File(luceneIndex); if (INDEX_DIR.exists() && create == 1) { System.out.println("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); System.exit(1); } Directory dir = FSDirectory.open(new File(luceneIndex)); // Open lucene stuff Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_43, analyzer); // iwc.setRAMBufferSizeMB(1024); // http://wiki.apache.org/lucene-java/ImproveIndexingSpeed iwc.setMaxThreadStates(100); // manage append mode if (create == 0) { // add new document to an existing index iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); // if appending, checkindex if (checkindex == 1) { System.out.println("Checking index ..."); CheckIndex ci = new CheckIndex(dir); ci.checkIndex(); System.out.println("End of Checking index"); } } else { iwc.setOpenMode(OpenMode.CREATE); } // build writer IndexWriter writer = new IndexWriter(dir, iwc); final File docDir = new File(home); System.out.println("Indexing directory '" + home + "'..."); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } // read all the files BufferedReader reader = new BufferedReader(new FileReader(homelist)); // read line by line each file name String text = ""; boolean verbose = true; while ((text = reader.readLine()) != null) { String filename = home + text; final File testFile = new File(filename); // verbose - remove from one line files if (verbose) { System.out.println("---V-->" + "Indexing content of " + filename); } if (testFile.isFile() && !filename.contains("\\.gz")) { // open file and read FileReader fread = new FileReader(filename); BufferedReader readerDoc = new BufferedReader(fread); // initialize variable for loop String fileRef = ""; // the line containing the document id String fromfile = ""; // the first reader for all the file String textdoc = ""; // inside the file the reader for the document while ((fromfile = readerDoc.readLine()) != null) { if (fromfile.toUpperCase().contains("<DOC ID=") || fromfile.toUpperCase().contains("<DOC>")) { String fromdoc = fromfile; // begin to index the DOCID (to keep good offset for collection of mention) textdoc = fromfile; // initialize variable and keep the first line // accumulate all the content while (!fromdoc.toUpperCase().contains("</DOC>")) { // collect the doc id // store the current file ref // it can come : // - from the last fromfile (first iteration) // - from a current iteration of fromdoc (any iteration) if (fromdoc.toUpperCase().contains("<DOC ID=") || fromdoc.toUpperCase().contains("<DOCID>")) { fileRef = fromdoc; } // accumulate the complete document for later offset reading of mention fromdoc = readerDoc.readLine(); textdoc = textdoc + "\n" + fromdoc; } // locate id // 2 forms // <DOCID> ALHURRA_NEWS13_ARB_20050412_130100-2.LDC2006E92 </DOCID> // <doc id="bolt-eng-DF-183-195681-7948494"> // form 1 String idStr = fileRef; if (idStr.contains("<DOCID>")) { idStr = idStr.replace("<DOCID>", ""); idStr = idStr.replace("</DOCID>", ""); idStr = idStr.replace(" ", ""); // retire l'espace } if (idStr.contains("<DOC id=")) { idStr = idStr.replace("<DOC id=\"", ""); idStr = idStr.replaceAll("\".+>$", ""); //idStr = idStr.replaceAll("\">$", ""); } // lower case ->new corpus of LDC /* if (idStr.contains("<docid>")){ idStr = idStr.replace("<docid>", ""); idStr = idStr.replace("</docid>", ""); idStr = idStr.replace(" ", ""); // retire l'espace } if (idStr.contains("<doc id=")){ idStr = idStr.replace("<doc id=\"", ""); idStr = idStr.replaceAll("\".+>$", ""); // idStr = idStr.replaceAll("\">$", ""); } */ indexDocs(writer, idStr, textdoc); // display info managed++; counted++; // verbose remove for 1 doc files if (verbose) { System.out.println( "---V-->" + counted + ":" + filename + ":" + idStr + ":" + textdoc.length()); } if (managed > tocount) { managed = 0; System.out.println(counted + ":" + filename + ":------>" + idStr); // clean the writer //writer.waitForMerges(); //writer.forceMergeDeletes(); writer.commit(); } } // end of if } // end of while readerDoc.close(); fread.close(); } else { System.out.println(counted + ":Non lisible ou non requis:" + filename); } } // close properly the index writer // !! Caution !! in case of error, if this is not closed, the index is corrupted // and has to be regenerated writer.close(); reader.close(); }
From source file:kbp2013.index.IndexWikipediaCorpus.java
License:Open Source License
public static void main(String[] args) throws IOException { initializeFromDefault();/*from w ww . j av a 2s.c o m*/ int managed = 0; // counter to count idents int counted = 0; // when to display int tocount = 1000; int saved = 0; System.out.println("Indexing Wikipedia Dump to directory '" + wikiluceneIndex + "'..."); INDEX_DIR = new File(wikiluceneIndex); if (INDEX_DIR.exists() && create == 1) { System.out.println("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); System.exit(1); } Directory dir = FSDirectory.open(new File(wikiluceneIndex)); // Open lucene stuff Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); // configure Lucene Stuff iwc.setMaxThreadStates(100); // manage append mode if (create == 0) { // add new document to an existing index iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); // if appending, check index if (checkindex == 1) { System.out.println("Checking index ..."); CheckIndex ci = new CheckIndex(dir); ci.checkIndex(); System.out.println("End of Checking index"); } } else { iwc.setOpenMode(OpenMode.CREATE); } // build writer IndexWriter writer = new IndexWriter(dir, iwc); // -------------------------- // // Open the Wikipedia Dump // //--------------------------- BufferedReader reader = new BufferedReader(new FileReader(wikidump)); // read the domains String text = ""; ArrayList domain = new ArrayList(); // the content retrieved according to the page key while (!text.contains("</siteinfo>")) { text = reader.readLine(); if (text.contains("<namespace key=") && !text.contains("<namespace key=\"0")) { String thisnamespace = text.replaceAll("<namespace key=[^>]+>", ""); thisnamespace = thisnamespace.replaceAll("</namespace>", ""); thisnamespace = thisnamespace.replaceAll("^[ ]+", ""); thisnamespace = thisnamespace + ":"; if (!thisnamespace.contentEquals("")) { domain.add(thisnamespace); System.out.println("Registered domain:" + thisnamespace + ";"); } } } System.out.println("--------------------------------"); // read the pages while ((text = reader.readLine()) != null) { String textdoc = ""; // inside the file, the reader for the document String pagename = ""; boolean tosave = true; // beginning of a page // accumulate if (text.contains("<page>")) { textdoc = text; while (!text.contains("</page>")) { text = reader.readLine(); textdoc = textdoc + text; if (text.contains("<title>")) { pagename = text.replaceAll("<title>", ""); pagename = pagename.replaceAll("</title>", ""); pagename = pagename.replaceAll("[ ]{2,10}", ""); //System.out.println("Page:" + pagename); } // safety } // after page reading index document // verify if document // A) is not a redirect // B) is not from a domain for (int a = 0; a < domain.size(); a++) { String domaintosearch = domain.get(a).toString(); if (pagename.toLowerCase().contains(domaintosearch.toLowerCase())) { System.out.println("Specific page:" + pagename); tosave = false; } } /* if (textdoc.contains("[A-Za-z ]+:")){ System.out.println("Specific page domain:" + pagename); tosave = false; }*/ if (textdoc.contains("#REDIRECT")) { // System.out.println("Redirect:" + pagename); tosave = false; } if (tosave) { saved++; indexDocs(writer, pagename, textdoc); } // display info managed++; counted++; if (managed > tocount) { managed = 0; System.out.println(counted + ":" + saved + ":" + pagename + ":------>" + textdoc.length()); // System.out.println(textdoc); writer.commit(); } } } // end while // close properly the index writer // !! Caution !! in case of error, if this is not closed, the index is corrupted // and has to be regenerated writer.close(); reader.close(); }
From source file:kbp2013.index.IndexWikipediaCorpus_v2.java
License:Open Source License
/** * //from ww w . j a v a2s . com * * @param args * @throws IOException * @throws Exception */ public static void main(String[] args) throws IOException, Exception { initializeFromDefault(); System.out.println("Indexing Wikipedia Dump to directory '" + wikiluceneIndex + "'..."); INDEX_DIR = new File(wikiluceneIndex); if (INDEX_DIR.exists() && create == 1) { System.out.println("Cannot save index to '" + INDEX_DIR + "' directory, please delete it first"); System.exit(1); } if (wikidump.endsWith(".bzip2") == false) { System.out.println("NOTICE: The Wikipedia dump must be in bzip2 format."); System.exit(0); } Directory dir = FSDirectory.open(new File(wikiluceneIndex)); // Open lucene stuff Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); // configure Lucene Stuff iwc.setMaxThreadStates(100); // manage append mode if (create == 0) { // add new document to an existing index iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); // if appending, check index if (checkindex == 1) { System.out.println("Checking index ..."); CheckIndex ci = new CheckIndex(dir); ci.checkIndex(); System.out.println("End of Checking index"); } } else { iwc.setOpenMode(OpenMode.CREATE); } // build writer IndexWriter writer = new IndexWriter(dir, iwc); // -------------------------- // // Open the Wikipedia Dump // //--------------------------- //Processing the large xml file in bzip2 format InputStream fileInputStream = new BufferedInputStream(new FileInputStream(wikidump)); BZip2InputStream inputStream = new BZip2InputStream(fileInputStream, false); InputStreamReader isr = new InputStreamReader(inputStream); BufferedReader reader = new BufferedReader(isr); String line; //temporary stores the content of each file StringBuilder pageBuffer = new StringBuilder(); //contains the title of the current page String docTitle = ""; //contains the content of the current page String content = ""; int docCount = 0; // number of documents that have been stored Date start = new Date(); //log the time when the indexing process starts while ((line = reader.readLine()) != null) { if (StringUtils.contains(line, "</page>") == true) { if (pageBuffer.length() > 0) { //get the title of the page int startIndex = pageBuffer.toString().indexOf("<title>") + 7; int endIndex = pageBuffer.toString().indexOf("</title>"); docTitle = pageBuffer.toString().substring(startIndex, endIndex); //get the content of the page int startPageIndex = pageBuffer.toString().indexOf("<page>"); content = pageBuffer.toString().substring(startPageIndex) + "</page>"; //verify the namespace of the page, it should be 0 int namespaceValue = Integer .parseInt(content.substring(content.indexOf("<ns>") + 4, content.indexOf("</ns>"))); if (namespaceValue != 0) { //reset buffer pageBuffer = new StringBuilder(); continue; } //verify that it is not a redirect page if (content.indexOf("<text xml:space=\"preserve\">#REDIRECT") != -1) { //reset buffer pageBuffer = new StringBuilder(); continue; } else { indexDocument(writer, content, docTitle.toLowerCase()); System.err.println("Processed " + docCount + " documents"); } docCount++; } //reset buffer pageBuffer = new StringBuilder(); } pageBuffer.append(line); } fileInputStream.close(); writer.close(); Date end = new Date(); // close properly the index writer // !! Caution !! in case of error, if this is not closed, the index is corrupted // and have to be regenerated reader.close(); System.err.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:net.dataforte.infinispan.amanuensis.ExecutorContext.java
License:Open Source License
public synchronized Status check(boolean fix) { try {/* w ww . ja v a2 s. c o m*/ forceUnlock(); CheckIndex checker = new CheckIndex(directory); Status check = checker.checkIndex(); if (check.clean) { log.info("Index " + AmanuensisManager.getUniqueDirectoryIdentifier(directory) + " is clean"); } else { log.warn("Index " + AmanuensisManager.getUniqueDirectoryIdentifier(directory) + " is NOT clean, fixing..."); checker.fixIndex(check); } return check; } catch (IOException e) { log.error("", e); return null; } }
From source file:org.apache.solr.index.hdfs.CheckHdfsIndex.java
License:Apache License
@SuppressForbidden(reason = "System.out required: command line tool") protected static int doMain(String[] args) throws IOException, InterruptedException { CheckIndex.Options opts;/*from w ww . ja v a2s .com*/ try { opts = CheckIndex.parseOptions(args); } catch (IllegalArgumentException e) { System.out.println(e.getMessage()); return 1; } if (!CheckIndex.assertsOn()) { System.out.println( "\nNOTE: testing will be more thorough if you run java with '-ea:org.apache.lucene...', so assertions are enabled"); } if (opts.getDirImpl() != null) { System.out.println( "\nIgnoring specified -dir-impl, instead using " + HdfsDirectory.class.getSimpleName()); } System.out.println("\nOpening index @ " + opts.getIndexPath() + "\n"); Directory directory; try { directory = new HdfsDirectory(new Path(opts.getIndexPath()), getConf()); } catch (IOException e) { System.out.println("ERROR: could not open hdfs directory \"" + opts.getIndexPath() + "\"; exiting"); e.printStackTrace(System.out); return 1; } try (Directory dir = directory; CheckIndex checker = new CheckIndex(dir)) { opts.setOut(System.out); return checker.doCheck(opts); } }
From source file:org.elasticsearch.common.util.MultiDataPathUpgrader.java
License:Apache License
/** * Runs check-index on the target shard and throws an exception if it failed *//*w ww . j a v a2 s. c o m*/ public void checkIndex(ShardPath targetPath) throws IOException { BytesStreamOutput os = new BytesStreamOutput(); PrintStream out = new PrintStream(os, false, Charsets.UTF_8.name()); try (Directory directory = new SimpleFSDirectory(targetPath.resolveIndex()); final CheckIndex checkIndex = new CheckIndex(directory)) { checkIndex.setInfoStream(out); CheckIndex.Status status = checkIndex.checkIndex(); out.flush(); if (!status.clean) { logger.warn("check index [failure]\n{}", new String(os.bytes().toBytes(), Charsets.UTF_8)); throw new IllegalStateException("index check failure"); } } }