List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:org.sonatype.nexus.index.updater.DefaultIndexUpdater.java
License:Open Source License
private static void copyUpdatedDocuments(final Directory sourcedir, final Directory targetdir, final IndexingContext context) throws CorruptIndexException, LockObtainFailedException, IOException { IndexWriter w = null;//from w w w .j a v a 2s.co m IndexReader r = null; try { r = IndexReader.open(sourcedir); w = new IndexWriter(targetdir, false, new NexusAnalyzer(), true); for (int i = 0; i < r.maxDoc(); i++) { if (!r.isDeleted(i)) { w.addDocument(IndexUtils.updateDocument(r.document(i), context)); } } w.optimize(); w.flush(); } finally { IndexUtils.close(w); IndexUtils.close(r); } }
From source file:org.sonatype.nexus.ReindexIT.java
License:Open Source License
protected void shiftContextInTime(IndexingContext ctx, int shiftDays) throws IOException { if (shiftDays != 0) { final IndexWriter iw = ctx.getIndexWriter(); final IndexSearcher is = ctx.acquireIndexSearcher(); try {//from w ww.j ava2 s. com final IndexReader ir = is.getIndexReader(); for (int docNum = 0; docNum < ir.maxDoc(); docNum++) { if (!ir.isDeleted(docNum)) { Document doc = ir.document(docNum); String lastModified = doc.get(ArtifactInfo.LAST_MODIFIED); if (lastModified != null) { long lm = Long.parseLong(lastModified); lm = lm + (shiftDays * A_DAY_MILLIS); doc.removeFields(ArtifactInfo.LAST_MODIFIED); doc.add(new Field(ArtifactInfo.LAST_MODIFIED, Long.toString(lm), Field.Store.YES, Field.Index.NO)); iw.updateDocument(new Term(ArtifactInfo.UINFO, doc.get(ArtifactInfo.UINFO)), doc); } } } ctx.optimize(); ctx.commit(); // shift timestamp too if (ctx.getTimestamp() != null) { ctx.updateTimestamp(true, new Date(ctx.getTimestamp().getTime() + (shiftDays * A_DAY_MILLIS))); } else { ctx.updateTimestamp(true, new Date(System.currentTimeMillis() + (shiftDays * A_DAY_MILLIS))); } } finally { ctx.releaseIndexSearcher(is); } } }
From source file:org.springmodules.lucene.index.core.DefaultLuceneIndexTemplate.java
License:Apache License
public int getMaxDoc() { IndexReader reader = IndexReaderFactoryUtils.getIndexReader(indexFactory); try {/*ww w .j a v a 2s . c o m*/ return reader.maxDoc(); } finally { IndexReaderFactoryUtils.releaseIndexReader(indexFactory, reader); } }
From source file:org.talend.dataquality.standardization.migration.FirstNameIndexMigrator.java
License:Open Source License
/** * regenerate all indexes recursively.//from w ww .ja va 2s .com * * @param inputFolder * @param outputFolder * @throws java.io.IOException */ private int regenerate(File inputFolder, File outputFolder) throws IOException { FSDirectory indexDir = FSDirectory.open(inputFolder); CheckIndex check = new CheckIndex(indexDir); Status status = check.checkIndex(); if (status.missingSegments) { for (File f : inputFolder.listFiles()) { if (f.isDirectory()) { File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName()); out.mkdir(); regenerate(f, out); } } } else { System.out.println("REGENERATE: " + inputFolder.getAbsoluteFile()); FSDirectory outputDir = FSDirectory.open(outputFolder); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(outputDir, config); IndexReader reader = DirectoryReader.open(indexDir); Document doc = null; // for any other indexes, regenerate with new Analyzer, but no // changes to document. for (int i = 0; i < reader.maxDoc(); i++) { doc = reader.document(i); if (IS_MIGRATING_FIRSTNAME_INDEX) { Document newDoc = generateFirstNameDoc(doc); if (newDoc != null) { writer.addDocument(newDoc); } } else { writer.addDocument(doc); } } System.out.println("count: " + count); writer.commit(); writer.close(); outputDir.close(); // copy all other files such as "readMe.txt" for (File file : inputFolder.listFiles()) { if (file.isFile() && !isLuceneIndexFile(file)) { // copy to destination folder copyFile(file, outputFolder); } } } return 0; }
From source file:org.talend.dataquality.standardization.migration.IndexMigrator.java
License:Open Source License
/** * regenerate all indexes recursively./*from w ww . j a v a 2 s.co m*/ * * @param inputFolder * @param outputFolder * @throws java.io.IOException */ private int regenerate(File inputFolder, File outputFolder) throws IOException { FSDirectory inputDir = FSDirectory.open(inputFolder); CheckIndex check = new CheckIndex(inputDir); Status status = check.checkIndex(); if (status.missingSegments) { for (File f : inputFolder.listFiles()) { if (f.isDirectory()) { File out = new File(outputFolder.getAbsolutePath() + "/" + f.getName()); out.mkdir(); regenerate(f, out); } } } else { System.out.println("REGENERATE: " + inputFolder.getPath()); FSDirectory outputDir = FSDirectory.open(outputFolder); analyzer = new StandardAnalyzer(CharArraySet.EMPTY_SET); IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer); IndexWriter writer = new IndexWriter(outputDir, config); IndexReader reader = DirectoryReader.open(inputDir); // for any other indexes, regenerate with new Analyzer, but no // changes to document. Collection<String> fieldNames = new ArrayList<String>(); int count = 0; Bits liveDocs = MultiFields.getLiveDocs(reader); for (int i = 0; i < reader.maxDoc(); i++) { if (liveDocs != null && !liveDocs.get(i)) { continue; } Document doc = reader.document(i); List<IndexableField> fields = doc.getFields(); for (int k = 0; k < fields.size(); k++) { fieldNames.add(fields.get(k).name()); } if (fieldNames.contains(F_WORD)) { // for "out of the box" indexes, regenerate the index with 2 // extra fields ("SYNTERM" and "WORDTERM") for better scoring. String word = doc.getValues(F_WORD)[0]; String[] synonyms = doc.getValues(F_SYN); Set<String> synonymSet = new HashSet<String>(); for (String syn : synonyms) { if (!syn.equals(word)) { synonymSet.add(syn); } } Document newDoc = generateDocument(word, synonymSet); writer.addDocument(newDoc); } else { writer.addDocument(doc); } count++; } System.out.println("count: " + count); reader.close(); writer.commit(); writer.close(); outputDir.close(); // copy all other files such as "readMe.txt" for (File file : inputFolder.listFiles()) { if (file.isFile() && !isLuceneIndexFile(file)) { // copy to destination folder copyFile(file, outputFolder); } } } return 0; }
From source file:org.toubassi.femtozip.lucene.IndexDocumentList.java
License:Apache License
public IndexDocumentList(IndexReader reader, int numSamples, int firstDoc, String fieldName) throws IOException { this.reader = reader; this.fieldName = fieldName; numDocs = reader.numDocs();//from w w w .j ava2 s. c o m float samplingRate = ((float) numSamples) / numDocs; ArrayList<Integer> docIdsList = new ArrayList<Integer>(); ArrayList<Integer> fieldCountList = new ArrayList<Integer>(); int numDocsScanned = 0, numDocsSampled = 0; for (int i = firstDoc, count = reader.maxDoc(); i < count; i++) { numDocsScanned++; if (reader.isDeleted(i)) { continue; } if (((int) (numDocsScanned * samplingRate)) <= numDocsSampled) { continue; } numDocsSampled++; Document doc = reader.document(i); Field fields[] = doc.getFields(fieldName); if (fields.length > 0) { if (fields[0].isStored()) { docIdsList.add(i); fieldCountList.add(fields.length); } } } docIds = new int[docIdsList.size()]; for (int i = 0, count = docIdsList.size(); i < count; i++) { docIds[i] = docIdsList.get(i); } fieldCounts = new int[fieldCountList.size()]; for (int i = 0, count = fieldCountList.size(); i < count; i++) { fieldCounts[i] = fieldCountList.get(i); if (i > 0) { fieldCounts[i] += fieldCounts[i - 1]; } } }
From source file:org.toubassi.femtozip.lucene.IndexDumper.java
License:Apache License
protected void dump() throws IOException { IndexReader reader = IndexReader.open(indexPath); Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL); String[] fieldNames = new String[allFields.size()]; allFields.toArray(fieldNames);/*w w w. j a v a 2s.c om*/ numDocs = reader.numDocs(); int maxDocId = reader.maxDoc(); float samplingRate = ((float) numSamples) / numDocs; int numDocsScanned = 0; int numDocsSampled = 0; for (int docId = 0; docId < maxDocId; docId++) { if (reader.isDeleted(docId)) { continue; } numDocsScanned++; if (((int) (numDocsScanned * samplingRate)) <= numDocsSampled) { continue; } numDocsSampled++; Document doc = reader.document(docId); System.out.println("DOCUMENT: " + docId); for (String fieldName : fieldNames) { if (fieldsToDump != null && fieldsToDump.indexOf(fieldName) == -1) { continue; } Field[] fields = doc.getFields(fieldName); for (Field field : fields) { if (!field.isStored() || field.isCompressed()) { // TODO if its compressed, uncompress it and benchmark it. continue; } byte[] bytes; if (field.isBinary()) { bytes = new byte[field.getBinaryLength()]; System.arraycopy(field.getBinaryValue(), field.getBinaryOffset(), bytes, 0, field.getBinaryLength()); } else { String value = field.stringValue(); bytes = value.getBytes("UTF-8"); } if (bytes.length > 0) { System.out.print(" " + fieldName + " " + bytes.length + " "); System.out.write(bytes); System.out.println(); } } } } reader.close(); }
From source file:org.toubassi.femtozip.lucene.StoredFieldDumper.java
License:Apache License
protected void dump() throws IOException { IndexReader reader = IndexReader.open(indexPath); Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL); String[] fieldNames = new String[allFields.size()]; allFields.toArray(fieldNames);/*ww w. jav a2s .c o m*/ Map<String, OutputStream> output = new HashMap<String, OutputStream>(); long lastStatusTime = 0; for (int docId = 0, count = reader.maxDoc(); docId < count; docId++) { Document doc = reader.document(docId); if (System.currentTimeMillis() - lastStatusTime > 5000) { lastStatusTime = System.currentTimeMillis(); System.out.println("Processing docId " + docId + " of " + count); } for (String fieldName : fieldNames) { Field[] fields = doc.getFields(fieldName); for (Field field : fields) { if (!field.isStored() || field.isCompressed()) { // TODO if its compressed, uncompress it and benchmark it. continue; } byte[] bytes; if (field.isBinary()) { bytes = new byte[field.getBinaryLength()]; System.arraycopy(field.getBinaryValue(), field.getBinaryOffset(), bytes, 0, field.getBinaryLength()); } else { String value = field.stringValue(); bytes = value.getBytes("UTF-8"); } OutputStream out = output.get(fieldName); if (out == null) { FileOutputStream fileOut = new FileOutputStream(outputBasePath + "_" + fieldName); out = new BufferedOutputStream(fileOut); output.put(fieldName, out); } out.write(bytes); } } } reader.close(); for (Map.Entry<String, OutputStream> entry : output.entrySet()) { entry.getValue().close(); } }
From source file:org.toubassi.femtozip.lucene.StoredFieldExploder.java
License:Apache License
protected void dump() throws IOException { IndexReader reader = IndexReader.open(indexPath); Collection<?> allFields = reader.getFieldNames(IndexReader.FieldOption.ALL); String[] fieldNames = new String[allFields.size()]; allFields.toArray(fieldNames);/*w w w. ja v a 2s . c om*/ int numProcessed = 0; for (int docId = 0, count = reader.maxDoc(); docId < count && numProcessed < numSamples; docId++) { if (reader.isDeleted(docId)) { continue; } Document doc = reader.document(docId); Field field = doc.getField(fieldName); if (field != null) { FileOutputStream out = new FileOutputStream( outputBasePath + File.separator + (numProcessed + 1) + "." + fieldName); if (field.isBinary()) { out.write(field.getBinaryValue(), field.getBinaryOffset(), field.getBinaryLength()); } else { out.write(field.stringValue().getBytes("UTF-8")); } out.close(); numProcessed++; } } reader.close(); }
From source file:org.zanata.hibernate.search.LocaleFilter.java
License:Open Source License
@Override public DocIdSet getDocIdSet(IndexReader reader) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); log.debug("getDocIdSet for {}", locale); Term term = new Term("locale", locale.toString()); TermDocs termDocs = reader.termDocs(term); while (termDocs.next()) { bitSet.set(termDocs.doc());/*w ww . j a v a2 s. c o m*/ } return bitSet; }