Example usage for org.apache.lucene.index IndexReader maxDoc

List of usage examples for org.apache.lucene.index IndexReader maxDoc

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader maxDoc.

Prototype

public abstract int maxDoc();

Source Link

Document

Returns one greater than the largest possible document number.

Usage

From source file:de.unihildesheim.iw.lucene.document.FeedbackQuery.java

License:Open Source License

/**
 * Get the maximum number of documents that can be retrieved.
 *
 * @param reader Reader to access the index
 * @param docCount Number of documents that should be retrieved
 * @return Actual number of documents possible to retrieve
 *///  w ww  .j  av  a 2s . c om
static int getMaxDocs(@NotNull final IndexReader reader, final int docCount) {
    final int maxRetDocs; // maximum number of documents that can be returned
    if (docCount == Integer.MAX_VALUE) {
        return reader.maxDoc();
    }
    final int maxIdxDocs = reader.maxDoc();
    if (docCount > maxIdxDocs) {
        maxRetDocs = Math.min(maxIdxDocs, docCount);
        LOG.warn("Requested number of feedback documents ({}) "
                + "is larger than the amount of documents in the index ({}). "
                + "Returning only {} feedback documents at maximum.", docCount, maxIdxDocs, maxRetDocs);
    } else {
        maxRetDocs = docCount;
    }
    return maxRetDocs;
}

From source file:edu.coeia.reports.IndexUtil.java

License:Open Source License

public static List<String> getAllFilePaths(final CaseFacade caseFacade) throws IOException {

    List<String> files = new ArrayList<String>();

    String indexDir = caseFacade.getCaseIndexFolderLocation();
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    for (int i = 0; i < indexReader.maxDoc(); i++) {
        Document document = indexReader.document(i);
        if (document != null) {
            Field field = document.getField(IndexingConstant.DOCUMENT_TYPE);
            if (field != null && field.stringValue() != null) {
                String path = field.stringValue();

                if (path.equals(IndexingConstant
                        .fromDocumentTypeToString(IndexingConstant.DOCUMENT_GENERAL_TYPE.FILE))) {
                    String relativePath = document.get(IndexingConstant.FILE_PATH);

                    if (!relativePath.isEmpty()) {
                        String fullpath = caseFacade.getFullPath(relativePath);
                        files.add(fullpath);
                    }/*from www . j a va2  s  . c  o  m*/
                }
            }
        }
    }

    indexReader.close();
    return files;
}

From source file:edu.coeia.reports.IndexUtil.java

License:Open Source License

private static List<String> getAllFilePathsHaveAuther(final CaseFacade caseFacade, final List<String> authers)
        throws IOException {

    List<String> files = new ArrayList<String>();

    String indexDir = caseFacade.getCaseIndexFolderLocation();
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    for (int i = 0; i < indexReader.maxDoc(); i++) {
        Document document = indexReader.document(i);
        if (document != null) {
            Field field = document.getField(IndexingConstant.DOCUMENT_TYPE);
            if (field != null && field.stringValue() != null) {
                String path = field.stringValue();

                if (path.equals(IndexingConstant
                        .fromDocumentTypeToString(IndexingConstant.DOCUMENT_GENERAL_TYPE.FILE))) {
                    String relativePath = document.get(IndexingConstant.FILE_PATH);
                    String auther = document.get("Author");

                    if (!relativePath.isEmpty() && auther != null && !auther.trim().isEmpty()
                            && Utilities.isFound(authers, auther)) {
                        String fullpath = caseFacade.getFullPath(relativePath);
                        files.add(fullpath);
                    }//from   ww w  . j  ava2 s .  c  o  m
                }
            }
        }
    }

    indexReader.close();
    return files;
}

From source file:edu.coeia.reports.IndexUtil.java

License:Open Source License

public static List<String> getAllAuthers(final CaseFacade caseFacade) throws IOException {

    List<String> files = new ArrayList<String>();

    String indexDir = caseFacade.getCaseIndexFolderLocation();
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    for (int i = 0; i < indexReader.maxDoc(); i++) {
        Document document = indexReader.document(i);
        if (document != null) {
            Field field = document.getField(IndexingConstant.DOCUMENT_TYPE);
            if (field != null && field.stringValue() != null) {
                String path = field.stringValue();

                if (path.equals(IndexingConstant
                        .fromDocumentTypeToString(IndexingConstant.DOCUMENT_GENERAL_TYPE.FILE))) {
                    String relativePath = document.get(IndexingConstant.FILE_PATH);
                    String auther = document.get("Author");

                    if (!relativePath.isEmpty() && auther != null && !auther.trim().isEmpty()) {
                        files.add(auther);
                    }//from www.  j  av  a2s  . co m
                }
            }
        }
    }

    indexReader.close();
    return files;
}

From source file:edu.coeia.tasks.CaseDuplicationTask.java

License:Open Source License

private void fillCaseDuplicationMap() throws Exception {
    String indexDir = this.aCase.getCaseLocation() + File.separator + ApplicationConstants.CASE_INDEX_FOLDER;
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    try {//from   w  w  w. java2s  .co  m
        for (int i = 0; i < indexReader.maxDoc(); i++) {
            Document document = indexReader.document(i);
            if (document != null) {
                Field field = document.getField(IndexingConstant.DOCUMENT_HASH);
                if (field != null && field.stringValue() != null) {
                    String documentHash = field.stringValue();
                    this.panel.getCaseDuplicationMap().put(documentHash,
                            document.get(IndexingConstant.DOCUMENT_ID));
                }
            }
        }
    } finally {
        indexReader.close();
    }
}

From source file:edu.coeia.tasks.CommonKeywordsTask.java

License:Open Source License

public Map<String, Integer> getAllTermFreqFromItems() throws IOException {
    Map<String, Integer> map = new HashMap<String, Integer>();

    String indexDir = this.aCase.getCaseLocation() + File.separator + ApplicationConstants.CASE_INDEX_FOLDER;
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);
    TermEnum terms = indexReader.terms();

    int factor = indexReader.maxDoc() / 100;

    while (terms.next()) {
        if (isCancelledTask())
            break;

        Term term = terms.term();//from   www  . j av a  2 s  .  c o m

        if (this.isAllowedFeild(term.field().trim())) {
            String termText = term.text();
            int frequency = indexReader.docFreq(term);

            if (frequency >= factor)
                map.put(termText, frequency);
        }
    }

    System.out.println("map size: " + map.size());
    indexReader.close();
    return map;
}

From source file:edu.coeia.tasks.EmailProcessingTask.java

License:Open Source License

private void getAllMessageInEmailSource(final String value, final String constant) {
    IndexReader indexReader = null;

    try {// w w  w. j  ava  2 s .  c  om
        List<Integer> ids = new ArrayList<Integer>();

        Directory dir = FSDirectory.open(new File(this.panel.getCaseFacade().getCaseIndexFolderLocation()));
        indexReader = IndexReader.open(dir);
        Map<Entry, Integer> messageCounter = new HashMap<Entry, Integer>();

        for (int i = 0; i < indexReader.maxDoc(); i++) {
            if (this.isCancelledTask()) {
                break;
            }

            Document document = indexReader.document(i);

            if (document != null) {
                Field field = document.getField(constant);
                if (field != null && field.stringValue() != null) {
                    String tmp = field.stringValue();

                    if (tmp.endsWith(value)) {
                        EmailItem item = (EmailItem) ItemFactory.newInstance(document, panel.getCaseFacade(),
                                false);

                        String to = "";
                        if (item.getTo() == null || item.getTo().trim().isEmpty())
                            to = value;
                        else
                            to = item.getTo();

                        if (checkingItemType(item)) {
                            Entry entry = new Entry(item.getFrom(), to, item.getTime());

                            Integer indexNo = messageCounter.get(entry);

                            if (indexNo == null) {
                                messageCounter.put(entry, 1);
                            } else {
                                messageCounter.put(entry, indexNo + 1);
                            }

                            ids.add(Integer.valueOf(item.getDocumentId()));
                        }
                    }
                }
            }
        }

        if (this.type == EMAIL_PROCESSING_TYPE.INBOX || this.type == EMAIL_PROCESSING_TYPE.SEND_ITEM)
            addResultToTable(messageCounter);
        else
            addDateResultToTable(messageCounter);

        messageCounter.clear();
        messageCounter = null;
    } catch (IOException ex) {
        ex.printStackTrace();
        Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        try {
            indexReader.close();
        } catch (IOException ex) {
            Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:edu.coeia.tasks.ExtensionFrequencyTask.java

License:Open Source License

private int getFactor() throws IOException {
    Directory directory = FSDirectory.open(new File(this.caseFacade.getCaseIndexFolderLocation()));

    IndexReader indexReader = IndexReader.open(directory);

    int factor = indexReader.maxDoc() / 200;
    indexReader.close();/* w  ww . j av  a2 s  .c o  m*/

    return factor;
}

From source file:edu.coeia.tasks.MultimediaLoadingTask.java

License:Open Source License

private void loadItems() throws IOException {
    String indexDir = this.caseFacade.getCaseIndexFolderLocation();
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    for (int i = 0; i < indexReader.maxDoc(); i++) {
        if (this.isCancelledTask())
            break;

        Document document = indexReader.document(i);

        if (document != null) {
            Field field = document.getField(IndexingConstant.FILE_MIME);

            if (field != null && field.stringValue() != null) {
                String documentExtension = field.stringValue();
                final StringBuilder fullpath = new StringBuilder();

                if (type == MultimediaViewerPanel.TYPE.IMAGE && isImage(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                } else if (type == MultimediaViewerPanel.TYPE.AUDIO && isAudio(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                } else if (type == MultimediaViewerPanel.TYPE.ARCHIVE && isArchieve(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                } else if (type == MultimediaViewerPanel.TYPE.VIDEO && isVideo(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                }//w w  w  .j  av a2 s  .  c o  m

                if (!fullpath.toString().isEmpty()) {
                    EventQueue.invokeLater(new Runnable() {
                        @Override
                        public void run() {
                            File file = new File(fullpath.toString());
                            Object[] data = { file.getAbsolutePath(), FileUtil.getExtension(file),
                                    file.lastModified(), file.isHidden(), file.length() };
                            JTableUtil.addRowToJTable(panel.getTable(), data);
                        }
                    });
                }
            }
        }
    }

    indexReader.close();
}

From source file:edu.illinois.cs.cogcomp.wikifier.utils.examples.LuceneExample.java

License:Open Source License

public static void main(String[] args) throws IOException, ParseException {

    String pathToIndexDir = "testIndex";
    createIndex(pathToIndexDir);// ww  w .  j av  a2s.  c o  m
    IndexReader reader = Lucene.reader(pathToIndexDir);
    Map<String, Float> idfs = Lucene.getIdfs(reader, "text");
    for (String k : idfs.keySet()) {
        System.out.println(k + " " + idfs.get(k));
    }
    System.out.println("TFS");
    for (int i = 0; i < reader.maxDoc(); i++) {
        System.out.println(reader.document(i).getField("title").stringValue());
        Map<String, Float> tfs = Lucene.getTfs(reader, "text", i);
        for (String k : tfs.keySet()) {
            System.out.println(k + " " + tfs.get(k));
        }
    }

}