List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:de.unihildesheim.iw.lucene.document.FeedbackQuery.java
License:Open Source License
/** * Get the maximum number of documents that can be retrieved. * * @param reader Reader to access the index * @param docCount Number of documents that should be retrieved * @return Actual number of documents possible to retrieve */// w ww .j av a 2s . c om static int getMaxDocs(@NotNull final IndexReader reader, final int docCount) { final int maxRetDocs; // maximum number of documents that can be returned if (docCount == Integer.MAX_VALUE) { return reader.maxDoc(); } final int maxIdxDocs = reader.maxDoc(); if (docCount > maxIdxDocs) { maxRetDocs = Math.min(maxIdxDocs, docCount); LOG.warn("Requested number of feedback documents ({}) " + "is larger than the amount of documents in the index ({}). " + "Returning only {} feedback documents at maximum.", docCount, maxIdxDocs, maxRetDocs); } else { maxRetDocs = docCount; } return maxRetDocs; }
From source file:edu.coeia.reports.IndexUtil.java
License:Open Source License
public static List<String> getAllFilePaths(final CaseFacade caseFacade) throws IOException { List<String> files = new ArrayList<String>(); String indexDir = caseFacade.getCaseIndexFolderLocation(); Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); for (int i = 0; i < indexReader.maxDoc(); i++) { Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.DOCUMENT_TYPE); if (field != null && field.stringValue() != null) { String path = field.stringValue(); if (path.equals(IndexingConstant .fromDocumentTypeToString(IndexingConstant.DOCUMENT_GENERAL_TYPE.FILE))) { String relativePath = document.get(IndexingConstant.FILE_PATH); if (!relativePath.isEmpty()) { String fullpath = caseFacade.getFullPath(relativePath); files.add(fullpath); }/*from www . j a va2 s . c o m*/ } } } } indexReader.close(); return files; }
From source file:edu.coeia.reports.IndexUtil.java
License:Open Source License
private static List<String> getAllFilePathsHaveAuther(final CaseFacade caseFacade, final List<String> authers) throws IOException { List<String> files = new ArrayList<String>(); String indexDir = caseFacade.getCaseIndexFolderLocation(); Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); for (int i = 0; i < indexReader.maxDoc(); i++) { Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.DOCUMENT_TYPE); if (field != null && field.stringValue() != null) { String path = field.stringValue(); if (path.equals(IndexingConstant .fromDocumentTypeToString(IndexingConstant.DOCUMENT_GENERAL_TYPE.FILE))) { String relativePath = document.get(IndexingConstant.FILE_PATH); String auther = document.get("Author"); if (!relativePath.isEmpty() && auther != null && !auther.trim().isEmpty() && Utilities.isFound(authers, auther)) { String fullpath = caseFacade.getFullPath(relativePath); files.add(fullpath); }//from ww w . j ava2 s . c o m } } } } indexReader.close(); return files; }
From source file:edu.coeia.reports.IndexUtil.java
License:Open Source License
public static List<String> getAllAuthers(final CaseFacade caseFacade) throws IOException { List<String> files = new ArrayList<String>(); String indexDir = caseFacade.getCaseIndexFolderLocation(); Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); for (int i = 0; i < indexReader.maxDoc(); i++) { Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.DOCUMENT_TYPE); if (field != null && field.stringValue() != null) { String path = field.stringValue(); if (path.equals(IndexingConstant .fromDocumentTypeToString(IndexingConstant.DOCUMENT_GENERAL_TYPE.FILE))) { String relativePath = document.get(IndexingConstant.FILE_PATH); String auther = document.get("Author"); if (!relativePath.isEmpty() && auther != null && !auther.trim().isEmpty()) { files.add(auther); }//from www. j av a2s . co m } } } } indexReader.close(); return files; }
From source file:edu.coeia.tasks.CaseDuplicationTask.java
License:Open Source License
private void fillCaseDuplicationMap() throws Exception { String indexDir = this.aCase.getCaseLocation() + File.separator + ApplicationConstants.CASE_INDEX_FOLDER; Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); try {//from w w w. java2s .co m for (int i = 0; i < indexReader.maxDoc(); i++) { Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.DOCUMENT_HASH); if (field != null && field.stringValue() != null) { String documentHash = field.stringValue(); this.panel.getCaseDuplicationMap().put(documentHash, document.get(IndexingConstant.DOCUMENT_ID)); } } } } finally { indexReader.close(); } }
From source file:edu.coeia.tasks.CommonKeywordsTask.java
License:Open Source License
public Map<String, Integer> getAllTermFreqFromItems() throws IOException { Map<String, Integer> map = new HashMap<String, Integer>(); String indexDir = this.aCase.getCaseLocation() + File.separator + ApplicationConstants.CASE_INDEX_FOLDER; Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); TermEnum terms = indexReader.terms(); int factor = indexReader.maxDoc() / 100; while (terms.next()) { if (isCancelledTask()) break; Term term = terms.term();//from www . j av a 2 s . c o m if (this.isAllowedFeild(term.field().trim())) { String termText = term.text(); int frequency = indexReader.docFreq(term); if (frequency >= factor) map.put(termText, frequency); } } System.out.println("map size: " + map.size()); indexReader.close(); return map; }
From source file:edu.coeia.tasks.EmailProcessingTask.java
License:Open Source License
private void getAllMessageInEmailSource(final String value, final String constant) { IndexReader indexReader = null; try {// w w w. j ava 2 s . c om List<Integer> ids = new ArrayList<Integer>(); Directory dir = FSDirectory.open(new File(this.panel.getCaseFacade().getCaseIndexFolderLocation())); indexReader = IndexReader.open(dir); Map<Entry, Integer> messageCounter = new HashMap<Entry, Integer>(); for (int i = 0; i < indexReader.maxDoc(); i++) { if (this.isCancelledTask()) { break; } Document document = indexReader.document(i); if (document != null) { Field field = document.getField(constant); if (field != null && field.stringValue() != null) { String tmp = field.stringValue(); if (tmp.endsWith(value)) { EmailItem item = (EmailItem) ItemFactory.newInstance(document, panel.getCaseFacade(), false); String to = ""; if (item.getTo() == null || item.getTo().trim().isEmpty()) to = value; else to = item.getTo(); if (checkingItemType(item)) { Entry entry = new Entry(item.getFrom(), to, item.getTime()); Integer indexNo = messageCounter.get(entry); if (indexNo == null) { messageCounter.put(entry, 1); } else { messageCounter.put(entry, indexNo + 1); } ids.add(Integer.valueOf(item.getDocumentId())); } } } } } if (this.type == EMAIL_PROCESSING_TYPE.INBOX || this.type == EMAIL_PROCESSING_TYPE.SEND_ITEM) addResultToTable(messageCounter); else addDateResultToTable(messageCounter); messageCounter.clear(); messageCounter = null; } catch (IOException ex) { ex.printStackTrace(); Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex); } finally { try { indexReader.close(); } catch (IOException ex) { Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex); } } }
From source file:edu.coeia.tasks.ExtensionFrequencyTask.java
License:Open Source License
private int getFactor() throws IOException { Directory directory = FSDirectory.open(new File(this.caseFacade.getCaseIndexFolderLocation())); IndexReader indexReader = IndexReader.open(directory); int factor = indexReader.maxDoc() / 200; indexReader.close();/* w ww . j av a2 s .c o m*/ return factor; }
From source file:edu.coeia.tasks.MultimediaLoadingTask.java
License:Open Source License
private void loadItems() throws IOException { String indexDir = this.caseFacade.getCaseIndexFolderLocation(); Directory dir = FSDirectory.open(new File(indexDir)); IndexReader indexReader = IndexReader.open(dir); for (int i = 0; i < indexReader.maxDoc(); i++) { if (this.isCancelledTask()) break; Document document = indexReader.document(i); if (document != null) { Field field = document.getField(IndexingConstant.FILE_MIME); if (field != null && field.stringValue() != null) { String documentExtension = field.stringValue(); final StringBuilder fullpath = new StringBuilder(); if (type == MultimediaViewerPanel.TYPE.IMAGE && isImage(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.AUDIO && isAudio(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.ARCHIVE && isArchieve(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); } else if (type == MultimediaViewerPanel.TYPE.VIDEO && isVideo(documentExtension)) { fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH))); }//w w w .j av a2 s . c o m if (!fullpath.toString().isEmpty()) { EventQueue.invokeLater(new Runnable() { @Override public void run() { File file = new File(fullpath.toString()); Object[] data = { file.getAbsolutePath(), FileUtil.getExtension(file), file.lastModified(), file.isHidden(), file.length() }; JTableUtil.addRowToJTable(panel.getTable(), data); } }); } } } } indexReader.close(); }
From source file:edu.illinois.cs.cogcomp.wikifier.utils.examples.LuceneExample.java
License:Open Source License
public static void main(String[] args) throws IOException, ParseException { String pathToIndexDir = "testIndex"; createIndex(pathToIndexDir);// ww w . j av a2s. c o m IndexReader reader = Lucene.reader(pathToIndexDir); Map<String, Float> idfs = Lucene.getIdfs(reader, "text"); for (String k : idfs.keySet()) { System.out.println(k + " " + idfs.get(k)); } System.out.println("TFS"); for (int i = 0; i < reader.maxDoc(); i++) { System.out.println(reader.document(i).getField("title").stringValue()); Map<String, Float> tfs = Lucene.getTfs(reader, "text", i); for (String k : tfs.keySet()) { System.out.println(k + " " + tfs.get(k)); } } }