List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:edu.coeia.tasks.CaseDuplicationTask.java
License:Open Source License
private void fillCaseDuplicationMap() throws Exception { String indexDir = this.aCase.getCaseLocation() + File.separator + ApplicationConstants.CASE_INDEX_FOLDER; Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); try {//from w w w . jav a2 s.com for (int i = 0; i < indexReader.maxDoc(); i++) { Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.DOCUMENT_HASH); if (field != null && field.stringValue() != null) { String documentHash = field.stringValue(); this.panel.getCaseDuplicationMap().put(documentHash, document.get(IndexingConstant.DOCUMENT_ID)); } } } } finally { indexReader.close(); } }
From source file:edu.coeia.tasks.EmailProcessingTask.java
License:Open Source License
private void getAllMessageInEmailSource(final String value, final String constant) { IndexReader indexReader = null; try {/*from w w w . j ava 2s. c o m*/ List<Integer> ids = new ArrayList<Integer>(); Directory dir = FSDirectory.open(new File(this.panel.getCaseFacade().getCaseIndexFolderLocation())); indexReader = IndexReader.open(dir); Map<Entry, Integer> messageCounter = new HashMap<Entry, Integer>(); for (int i = 0; i < indexReader.maxDoc(); i++) { if (this.isCancelledTask()) { break; } Document document = indexReader.document(i); if (document != null) { Field field = document.getField(constant); if (field != null && field.stringValue() != null) { String tmp = field.stringValue(); if (tmp.endsWith(value)) { EmailItem item = (EmailItem) ItemFactory.newInstance(document, panel.getCaseFacade(), false); String to = ""; if (item.getTo() == null || item.getTo().trim().isEmpty()) to = value; else to = item.getTo(); if (checkingItemType(item)) { Entry entry = new Entry(item.getFrom(), to, item.getTime()); Integer indexNo = messageCounter.get(entry); if (indexNo == null) { messageCounter.put(entry, 1); } else { messageCounter.put(entry, indexNo + 1); } ids.add(Integer.valueOf(item.getDocumentId())); } } } } } if (this.type == EMAIL_PROCESSING_TYPE.INBOX || this.type == EMAIL_PROCESSING_TYPE.SEND_ITEM) addResultToTable(messageCounter); else addDateResultToTable(messageCounter); messageCounter.clear(); messageCounter = null; } catch (IOException ex) { ex.printStackTrace(); Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex); } finally { try { indexReader.close(); } catch (IOException ex) { Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:edu.coeia.tasks.MultimediaLoadingTask.java
License:Open Source License
private void loadItems() throws IOException { String indexDir = this.caseFacade.getCaseIndexFolderLocation(); Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); for (int i = 0; i < indexReader.maxDoc(); i++) { if (this.isCancelledTask()) break; Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.FILE_MIME); if (field != null && field.stringValue() != null) { String documentExtension = field.stringValue(); final StringBuilder fullpath = new StringBuilder(); if (type == MultimediaViewerPanel.TYPE.IMAGE && isImage(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.AUDIO && isAudio(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.ARCHIVE && isArchieve(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.VIDEO && isVideo(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); }/* www .j ava 2s . c o m*/ if (!fullpath.toString().isEmpty()) { EventQueue.invokeLater(new Runnable() { @Override public void run() { File file = new File(fullpath.toString()); Object[] data = { file.getAbsolutePath(), FileUtil.getExtension(file), file.lastModified(), file.isHidden(), file.length() }; JTableUtil.addRowToJTable(panel.getTable(), data); } }); } } } } indexReader.close(); }
From source file:edu.harvard.iq.dvn.core.index.Indexer.java
License:Apache License
protected void updateStudyDocument(long studyId, String field, String value) throws IOException { IndexReader reader = IndexReader.open(dir, false); try {//ww w.ja va 2 s. c o m if (reader != null) { TermDocs matchingDocuments = reader.termDocs(); if (matchingDocuments != null) { int c = 1; if (matchingDocuments.next()) { // We only expect 1 document when searching by study id. Document studyDocument = reader.document(matchingDocuments.doc()); logger.fine("processing matching document number " + c++); if (studyDocument != null) { logger.fine("got a non-zero doc;"); reader.close(); reader = null; logger.fine("deleted the document;"); //updateDocument(studyDocument, studyId); IndexWriter localWriter = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(), IndexWriter.MaxFieldLength.UNLIMITED); localWriter.updateDocument(new Term("id", Long.toString(studyId)), studyDocument); localWriter.commit(); localWriter.close(); logger.fine("wrote the updated version of the document;"); } } } } } catch (IOException ex) { ex.printStackTrace(); } finally { if (reader != null) { reader.close(); } } }
From source file:edu.illinois.cs.cogcomp.wikifier.utils.examples.LuceneExample.java
License:Open Source License
public static void main(String[] args) throws IOException, ParseException { String pathToIndexDir = "testIndex"; createIndex(pathToIndexDir);// w w w . j a v a 2 s . co m IndexReader reader = Lucene.reader(pathToIndexDir); Map<String, Float> idfs = Lucene.getIdfs(reader, "text"); for (String k : idfs.keySet()) { System.out.println(k + " " + idfs.get(k)); } System.out.println("TFS"); for (int i = 0; i < reader.maxDoc(); i++) { System.out.println(reader.document(i).getField("title").stringValue()); Map<String, Float> tfs = Lucene.getTfs(reader, "text", i); for (String k : tfs.keySet()) { System.out.println(k + " " + tfs.get(k)); } } }
From source file:edu.mit.ll.vizlinc.highlight.TokenSources.java
License:Apache License
public static TokenStream getTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer) throws IOException { Document doc = reader.document(docId); return getTokenStream(doc, field, analyzer); }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
private synchronized Directory copyDirectoryExcludeFields(Directory dir, String out_basedir, String out_name, String... fields_to_be_removed) throws IOException { IndexReader reader = DirectoryReader.open(dir); // IndexReader.open(dir, true); // read-only=true Directory newDir = createDirectory(out_basedir, out_name); IndexWriter writer = openIndexWriter(newDir); //log.info("Removing field(s) " + Util.join(fields_to_be_removed, ", ") + " from index."); for (int i = 0; i < reader.numDocs(); i++) { org.apache.lucene.document.Document doc = reader.document(i); for (String field : fields_to_be_removed) doc.removeFields(field);//from w ww . j av a 2s .c o m writer.addDocument(doc); } writer.close(); reader.close(); return newDir; }
From source file:edu.stanford.muse.index.Indexer.java
License:Apache License
private synchronized Directory copyDirectoryWithDocFilter(Directory dir, String out_basedir, String out_name, FilterFunctor filter_func) throws IOException { long startTime = System.currentTimeMillis(); IndexReader reader = DirectoryReader.open(dir); // IndexReader.open(dir, true); // read-only=true Directory newDir = createDirectory(out_basedir, out_name); IndexWriter writer = openIndexWriter(newDir); //log.info("Removing field(s) " + Util.join(fields_to_be_removed, ", ") + " from index."); int count = 0; for (int i = 0; i < reader.numDocs(); i++) { org.apache.lucene.document.Document doc = reader.document(i); if (filter_func == null || filter_func.filter(doc)) { writer.addDocument(doc);/* w ww.jav a2 s. c o m*/ count++; } } writer.close(); reader.close(); log.info("CopyDirectoryWithtDocFilter to dir:" + out_basedir + " name: " + baseDir + " time: " + (System.currentTimeMillis() - startTime) + " ms docs: " + count); return newDir; }
From source file:edu.uniklu.itec.mosaix.engine.Engine.java
License:Open Source License
/** * <p>Evaluates the search results provided by LIRE and * returns the best available match.</p> * <p>This method takes two aspects into account: First, * it uses the relevancy factor as provided by LIRE; * second, it uses implementation instances of the * <code>WeightingStrategy</code> interface added to this * interface.</p>/*from ww w .j a v a2s.c o m*/ * * @param original a non-<code>null</code> image instance. * @param hits a non-<code>null</code> LIRE search result. * @param scalePercentage value from 1-100d * @return the best match as determined by the relevancy * and the relevancy weighting. * @throws IOException if the image could not be loaded. * @see edu.uniklu.itec.mosaix.engine.WeightingStrategy */ public BufferedImage findBestMatch(final BufferedImage original, final ImageSearchHits hits, double scalePercentage, IndexReader reader) throws IOException { assert original != null; assert hits != null; //BufferedImage bestImage = null; WeightingData bestHit = null; float bestRating = Float.NEGATIVE_INFINITY; for (int i = 0; i < hits.length(); i++) { Document doc = reader.document(hits.documentID(i)); String file = doc.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue(); // BufferedImage repl = ImageIO.read(new File(file)); WeightingData data = weightingDataFactory_.newInstance(doc); data.setRelevancy((float) hits.score(i)); data.setSlice(original); data.setId(file); data.setScalePercentage(scalePercentage); // data.setReplacement(repl); float weight = getWeightedRelevancy(data); if (outweightImageReuse) { if (file2occurence.containsKey(file)) weight *= 1f / (((float) file2occurence.get(file)) + 1f); } // Logging.log(this, "Rated " + file + " with " + Float.toString(weight)); if (bestRating < weight) { bestRating = weight; bestHit = data; } } // Logging.log(this, "Enforcing Garbage Collection."); // System.gc(); // suppose, it's badly needed now for (EngineObserver observer : observer_) observer.notifyState(bestHit, EngineObserver.USED); // Logging.log(this, "Evaluation complete"); if (outweightImageReuse) { if (file2occurence.containsKey(bestHit.getId())) file2occurence.put(bestHit.getId(), file2occurence.get(bestHit.getId()) + 1); else file2occurence.put(bestHit.getId(), 1); } return bestHit.getReplacement(); }
From source file:edu.utah.bmi.ibiomes.catalog.MetadataLookup.java
License:Open Source License
/** * Get all standard attributes from the dictionary * @return List of standard metadata attributes * @throws IOException /*from w w w. j a v a 2 s . c o m*/ * @throws CorruptIndexException */ public MetadataAttributeList getAllMetadataAttributes() throws CorruptIndexException, IOException { logger.info("Loading list of standard metadata attributes"); MetadataAttributeList attrs = new MetadataAttributeList(); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile)); for (int d = 0; d < reader.numDocs(); d++) { Document doc = reader.document(d); MetadataAttribute attribute = getAttributeFromDocument(doc); attrs.add(attribute); } return attrs; }