Example usage for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype



public final Document document(int docID) throws IOException

Source Link

Document

Returns the stored fields of the n^th Document in this index.

Usage

From source file:edu.coeia.tasks.CaseDuplicationTask.java

License:Open Source License

private void fillCaseDuplicationMap() throws Exception {
    String indexDir = this.aCase.getCaseLocation() + File.separator + ApplicationConstants.CASE_INDEX_FOLDER;
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    try {//from w w  w  . jav a2  s.com
        for (int i = 0; i < indexReader.maxDoc(); i++) {
            Document document = indexReader.document(i);
            if (document != null) {
                Field field = document.getField(IndexingConstant.DOCUMENT_HASH);
                if (field != null && field.stringValue() != null) {
                    String documentHash = field.stringValue();
                    this.panel.getCaseDuplicationMap().put(documentHash,
                            document.get(IndexingConstant.DOCUMENT_ID));
                }
            }
        }
    } finally {
        indexReader.close();
    }
}

From source file:edu.coeia.tasks.EmailProcessingTask.java

License:Open Source License

private void getAllMessageInEmailSource(final String value, final String constant) {
    IndexReader indexReader = null;

    try {/*from   w w  w  .  j ava  2s.  c  o  m*/
        List<Integer> ids = new ArrayList<Integer>();

        Directory dir = FSDirectory.open(new File(this.panel.getCaseFacade().getCaseIndexFolderLocation()));
        indexReader = IndexReader.open(dir);
        Map<Entry, Integer> messageCounter = new HashMap<Entry, Integer>();

        for (int i = 0; i < indexReader.maxDoc(); i++) {
            if (this.isCancelledTask()) {
                break;
            }

            Document document = indexReader.document(i);

            if (document != null) {
                Field field = document.getField(constant);
                if (field != null && field.stringValue() != null) {
                    String tmp = field.stringValue();

                    if (tmp.endsWith(value)) {
                        EmailItem item = (EmailItem) ItemFactory.newInstance(document, panel.getCaseFacade(),
                                false);

                        String to = "";
                        if (item.getTo() == null || item.getTo().trim().isEmpty())
                            to = value;
                        else
                            to = item.getTo();

                        if (checkingItemType(item)) {
                            Entry entry = new Entry(item.getFrom(), to, item.getTime());

                            Integer indexNo = messageCounter.get(entry);

                            if (indexNo == null) {
                                messageCounter.put(entry, 1);
                            } else {
                                messageCounter.put(entry, indexNo + 1);
                            }

                            ids.add(Integer.valueOf(item.getDocumentId()));
                        }
                    }
                }
            }
        }

        if (this.type == EMAIL_PROCESSING_TYPE.INBOX || this.type == EMAIL_PROCESSING_TYPE.SEND_ITEM)
            addResultToTable(messageCounter);
        else
            addDateResultToTable(messageCounter);

        messageCounter.clear();
        messageCounter = null;
    } catch (IOException ex) {
        ex.printStackTrace();
        Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex);
    } finally {
        try {
            indexReader.close();
        } catch (IOException ex) {
            Logger.getLogger(EmailProcessingTask.class.getName()).log(Level.SEVERE, null, ex);
        }
    }
}

From source file:edu.coeia.tasks.MultimediaLoadingTask.java

License:Open Source License

private void loadItems() throws IOException {
    String indexDir = this.caseFacade.getCaseIndexFolderLocation();
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexReader indexReader = IndexReader.open(dir);

    for (int i = 0; i < indexReader.maxDoc(); i++) {
        if (this.isCancelledTask())
            break;

        Document document = indexReader.document(i);

        if (document != null) {
            Field field = document.getField(IndexingConstant.FILE_MIME);

            if (field != null && field.stringValue() != null) {
                String documentExtension = field.stringValue();
                final StringBuilder fullpath = new StringBuilder();

                if (type == MultimediaViewerPanel.TYPE.IMAGE && isImage(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                } else if (type == MultimediaViewerPanel.TYPE.AUDIO && isAudio(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                } else if (type == MultimediaViewerPanel.TYPE.ARCHIVE && isArchieve(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                } else if (type == MultimediaViewerPanel.TYPE.VIDEO && isVideo(documentExtension)) {
                    fullpath.append(this.caseFacade.getFullPath(document.get(IndexingConstant.FILE_PATH)));
                }/* www .j  ava 2s . c  o  m*/

                if (!fullpath.toString().isEmpty()) {
                    EventQueue.invokeLater(new Runnable() {
                        @Override
                        public void run() {
                            File file = new File(fullpath.toString());
                            Object[] data = { file.getAbsolutePath(), FileUtil.getExtension(file),
                                    file.lastModified(), file.isHidden(), file.length() };
                            JTableUtil.addRowToJTable(panel.getTable(), data);
                        }
                    });
                }
            }
        }
    }

    indexReader.close();
}

From source file:edu.harvard.iq.dvn.core.index.Indexer.java

License:Apache License

protected void updateStudyDocument(long studyId, String field, String value) throws IOException {
    IndexReader reader = IndexReader.open(dir, false);

    try {//ww  w.ja va  2 s. c  o  m
        if (reader != null) {
            TermDocs matchingDocuments = reader.termDocs();

            if (matchingDocuments != null) {
                int c = 1;
                if (matchingDocuments.next()) {
                    // We only expect 1 document when searching by study id.
                    Document studyDocument = reader.document(matchingDocuments.doc());

                    logger.fine("processing matching document number " + c++);
                    if (studyDocument != null) {
                        logger.fine("got a non-zero doc;");

                        reader.close();
                        reader = null;

                        logger.fine("deleted the document;");

                        //updateDocument(studyDocument, studyId);
                        IndexWriter localWriter = new IndexWriter(dir, getAnalyzer(), isIndexEmpty(),
                                IndexWriter.MaxFieldLength.UNLIMITED);
                        localWriter.updateDocument(new Term("id", Long.toString(studyId)), studyDocument);

                        localWriter.commit();
                        localWriter.close();
                        logger.fine("wrote the updated version of the document;");

                    }
                }
            }
        }

    } catch (IOException ex) {
        ex.printStackTrace();
    } finally {
        if (reader != null) {
            reader.close();
        }
    }
}

From source file:edu.illinois.cs.cogcomp.wikifier.utils.examples.LuceneExample.java

License:Open Source License

public static void main(String[] args) throws IOException, ParseException {

    String pathToIndexDir = "testIndex";
    createIndex(pathToIndexDir);//  w  w  w .  j  a v a  2 s  . co m
    IndexReader reader = Lucene.reader(pathToIndexDir);
    Map<String, Float> idfs = Lucene.getIdfs(reader, "text");
    for (String k : idfs.keySet()) {
        System.out.println(k + " " + idfs.get(k));
    }
    System.out.println("TFS");
    for (int i = 0; i < reader.maxDoc(); i++) {
        System.out.println(reader.document(i).getField("title").stringValue());
        Map<String, Float> tfs = Lucene.getTfs(reader, "text", i);
        for (String k : tfs.keySet()) {
            System.out.println(k + " " + tfs.get(k));
        }
    }

}

From source file:edu.mit.ll.vizlinc.highlight.TokenSources.java

License:Apache License

public static TokenStream getTokenStream(IndexReader reader, int docId, String field, Analyzer analyzer)
        throws IOException {
    Document doc = reader.document(docId);
    return getTokenStream(doc, field, analyzer);
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

private synchronized Directory copyDirectoryExcludeFields(Directory dir, String out_basedir, String out_name,
        String... fields_to_be_removed) throws IOException {
    IndexReader reader = DirectoryReader.open(dir); // IndexReader.open(dir, true); // read-only=true

    Directory newDir = createDirectory(out_basedir, out_name);
    IndexWriter writer = openIndexWriter(newDir);
    //log.info("Removing field(s) " + Util.join(fields_to_be_removed, ", ") + " from index.");

    for (int i = 0; i < reader.numDocs(); i++) {
        org.apache.lucene.document.Document doc = reader.document(i);
        for (String field : fields_to_be_removed)
            doc.removeFields(field);//from w ww .  j av a 2s  .c  o  m
        writer.addDocument(doc);
    }

    writer.close();
    reader.close();

    return newDir;
}

From source file:edu.stanford.muse.index.Indexer.java

License:Apache License

private synchronized Directory copyDirectoryWithDocFilter(Directory dir, String out_basedir, String out_name,
        FilterFunctor filter_func) throws IOException {
    long startTime = System.currentTimeMillis();
    IndexReader reader = DirectoryReader.open(dir); // IndexReader.open(dir, true); // read-only=true

    Directory newDir = createDirectory(out_basedir, out_name);
    IndexWriter writer = openIndexWriter(newDir);
    //log.info("Removing field(s) " + Util.join(fields_to_be_removed, ", ") + " from index.");

    int count = 0;
    for (int i = 0; i < reader.numDocs(); i++) {
        org.apache.lucene.document.Document doc = reader.document(i);
        if (filter_func == null || filter_func.filter(doc)) {
            writer.addDocument(doc);/* w ww.jav a2 s. c o m*/
            count++;
        }
    }

    writer.close();
    reader.close();

    log.info("CopyDirectoryWithtDocFilter to dir:" + out_basedir + " name: " + baseDir + " time: "
            + (System.currentTimeMillis() - startTime) + " ms docs: " + count);
    return newDir;
}

From source file:edu.uniklu.itec.mosaix.engine.Engine.java

License:Open Source License

/**
 * <p>Evaluates the search results provided by LIRE and
 * returns the best available match.</p>
 * <p>This method takes two aspects into account: First,
 * it uses the relevancy factor as provided by LIRE;
 * second, it uses implementation instances of the
 * <code>WeightingStrategy</code> interface added to this
 * interface.</p>/*from ww w .j  a  v a2s.c  o m*/
 *
 * @param original        a non-<code>null</code> image instance.
 * @param hits            a non-<code>null</code> LIRE search result.
 * @param scalePercentage value from 1-100d
 * @return the best match as determined by the relevancy
 *         and the relevancy weighting.
 * @throws IOException if the image could not be loaded.
 * @see edu.uniklu.itec.mosaix.engine.WeightingStrategy
 */
public BufferedImage findBestMatch(final BufferedImage original, final ImageSearchHits hits,
        double scalePercentage, IndexReader reader) throws IOException {
    assert original != null;
    assert hits != null;

    //BufferedImage bestImage = null;
    WeightingData bestHit = null;
    float bestRating = Float.NEGATIVE_INFINITY;

    for (int i = 0; i < hits.length(); i++) {
        Document doc = reader.document(hits.documentID(i));
        String file = doc.getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue();
        //         BufferedImage repl = ImageIO.read(new File(file));
        WeightingData data = weightingDataFactory_.newInstance(doc);
        data.setRelevancy((float) hits.score(i));
        data.setSlice(original);
        data.setId(file);
        data.setScalePercentage(scalePercentage);
        //         data.setReplacement(repl);

        float weight = getWeightedRelevancy(data);
        if (outweightImageReuse) {
            if (file2occurence.containsKey(file))
                weight *= 1f / (((float) file2occurence.get(file)) + 1f);
        }
        //         Logging.log(this, "Rated " + file + " with " + Float.toString(weight));

        if (bestRating < weight) {
            bestRating = weight;
            bestHit = data;
        }
    }

    //      Logging.log(this, "Enforcing Garbage Collection.");
    //      System.gc(); // suppose, it's badly needed now
    for (EngineObserver observer : observer_)
        observer.notifyState(bestHit, EngineObserver.USED);

    //      Logging.log(this, "Evaluation complete");
    if (outweightImageReuse) {
        if (file2occurence.containsKey(bestHit.getId()))
            file2occurence.put(bestHit.getId(), file2occurence.get(bestHit.getId()) + 1);
        else
            file2occurence.put(bestHit.getId(), 1);
    }
    return bestHit.getReplacement();
}

From source file:edu.utah.bmi.ibiomes.catalog.MetadataLookup.java

License:Open Source License

/**
 * Get all standard attributes from the dictionary
 * @return List of standard metadata attributes
 * @throws IOException /*from w  w  w. j a v  a 2  s  . c o  m*/
 * @throws CorruptIndexException 
 */
public MetadataAttributeList getAllMetadataAttributes() throws CorruptIndexException, IOException {
    logger.info("Loading list of standard metadata attributes");
    MetadataAttributeList attrs = new MetadataAttributeList();
    IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile));
    for (int d = 0; d < reader.numDocs(); d++) {
        Document doc = reader.document(d);
        MetadataAttribute attribute = getAttributeFromDocument(doc);
        attrs.add(attribute);
    }
    return attrs;
}