Example usage for org.apache.lucene.index IndexReader document

List of usage examples for org.apache.lucene.index IndexReader document

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexReader document.

Prototype




public final Document document(int docID) throws IOException 

Source Link

Document

Returns the stored fields of the nth Document in this index.

Usage

From source file:liredemo.SearchResultsTableModel.java

License:Open Source License

/**
 * @param hits/*from   w  ww  .  j ava2s  .  c o m*/
 * @param progress
 * @param reader
 */
public void setHits(ImageSearchHits hits, JProgressBar progress, IndexReader reader) {
    this.hits = hits;
    icons = new ArrayList<ImageIcon>(hits.length());
    if (progress != null)
        progress.setString("Searching finished. Loading images for result list.");
    for (int i = 0; i < hits.length(); i++) {
        ImageIcon icon = null;
        try {
            BufferedImage img = null;
            String fileIdentifier = reader.document(hits.documentID(i))
                    .getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue();
            if (!fileIdentifier.startsWith("http:")) {
                // check isf it is a jpg file ...
                if (fileIdentifier.toLowerCase().endsWith(".jpg")) {
                    Metadata metadata = new ExifReader(new FileInputStream(fileIdentifier)).extract();
                    if (metadata.containsDirectory(ExifDirectory.class)) {
                        ExifDirectory exifDirectory = (ExifDirectory) metadata
                                .getDirectory(ExifDirectory.class);
                        if (exifDirectory.containsThumbnail()) {
                            img = ImageIO.read(new ByteArrayInputStream(exifDirectory.getThumbnailData()));
                        }
                    }
                }
                if (img == null) {
                    img = ImageIO.read(new FileInputStream(fileIdentifier));
                }
            } else {
                img = ImageIO.read(new URL(fileIdentifier));
            }
            icon = new ImageIcon(ImageUtils.scaleImage(img, 200));
            if (progress != null)
                progress.setValue((i * 100) / hits.length());
        } catch (Exception ex) {
            Logger.getLogger("global").log(Level.SEVERE, null, ex);
        }
        icons.add(icon);
    }
    if (progress != null)
        progress.setValue(100);
    fireTableDataChanged();
}

From source file:lius.lucene.LuceneActions.java

License:Apache License

public synchronized List ListAllDocuments(String indexDir, LiusConfig lc) {
    List documentsList = new ArrayList();
    List fieldList = lc.getBrowseFieldsToDisplay();
    Map values = null;//from   w  w w.  ja v a  2s .  c o  m
    LiusHit lh = null;
    try {
        Directory directory = FSDirectory.getDirectory(indexDir, false);
        IndexReader ir = IndexReader.open(directory);
        int num = ir.numDocs();
        for (int i = 0; i <= num - 1; i++) {
            lh = new LiusHit();
            values = new HashMap();
            Document luceneDoc = ir.document(i);
            lh.setDocId(i);
            for (int j = 0; j < fieldList.size(); j++) {
                LiusField lf = (LiusField) fieldList.get(j);
                Field f = luceneDoc.getField(lf.getName());
                LiusField nlf = new LiusField();
                nlf.setName(lf.getName());
                nlf.setLabel(lf.getLabel());
                if (f != null) {
                    String content = f.stringValue();
                    nlf.setValue(content);
                    values.put(lf.getName(), nlf);
                }
            }
            lh.setLiusFieldsMap(values);
            documentsList.add(lh);
        }
        if (ir != null) {
            ir.close();
        }
    } catch (IOException e) {
        logger.error(e.getMessage());
    }
    return documentsList;
}

From source file:lucandra.LucandraTests.java

License:Apache License

public void testWildcardQuery() throws Exception {
    IndexReader indexReader = new IndexReader(indexName, client);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);

    // check wildcard
    Query q = qp.parse("+key:anoth*");
    TopDocs docs = searcher.search(q, 10);

    assertEquals(1, docs.totalHits);/* www  . ja v a2s  . co  m*/

    Document d = indexReader.document(1);

    String val = d.get("key");
    assertTrue(val.equals("this is another example"));

    // check wildcard
    q = qp.parse("+date:test*");
    docs = searcher.search(q, 10);

    assertEquals(101, docs.totalHits);

}

From source file:lucandra.LucandraTests.java

License:Apache License

public void testSortQuery() throws Exception {

    IndexReader indexReader = new IndexReader(indexName, client);
    IndexSearcher searcher = new IndexSearcher(indexReader);
    QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer);

    // check sort
    Sort sort = new Sort(new SortField("date", SortField.STRING));
    Query q = qp.parse("+key:sort");
    TopDocs docs = searcher.search(q, null, 10, sort);

    for (int i = 0; i < 10; i++) {
        Document d = indexReader.document(docs.scoreDocs[i].doc);
        String dval = d.get("date");
        assertEquals("test" + (i + 200), dval);
    }/* w  w  w  . j  a v  a2  s .c o  m*/

}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

/**
 * @param id/*from   www .  j  av  a2  s . com*/
 * @param title
 * @param keyColumn
 * @param bodyColumns
 * @param language
 * @param custom1
 * @param custom2
 * @param custom3
 * @param custom4
 * @return 
 * @throws SearchException
 */
protected IndexResult _deleteCustom(String id, QueryColumn keyColumn) throws SearchException {

    int countBefore = 0;
    int countAfter = 0;

    Map<String, Document> docs = new HashMap<String, Document>();

    Set<String> keys = toSet(keyColumn);
    IndexWriter writer = null;
    String key;
    IndexReader reader = null;
    Document doc;

    synchronized (token) {
        try {
            try {
                reader = _getReader(id, false);
                countBefore = reader.maxDoc();
                for (int i = 0; i < countBefore; i++) {
                    doc = reader.document(i);
                    key = doc.getField("key").stringValue();
                    if (!keys.contains(key))
                        docs.put(key, doc);
                }
            } catch (Exception e) {
            } finally {
                close(reader);
            }
            countAfter = docs.size();

            writer = _getWriter(id, true);
            Iterator<Entry<String, Document>> it = docs.entrySet().iterator();
            while (it.hasNext()) {
                writer.addDocument(it.next().getValue());
            }
            optimizeEL(writer);

        } catch (IOException e) {
            throw new SearchException(e);
        } finally {
            close(writer);
        }
        indexSpellCheck(id);
    }
    int removes = countBefore - countAfter;

    return new IndexResultImpl(removes, 0, 0);
}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

/**
  * @param id/*from  w  ww.j  a v a 2s.co  m*/
  * @param title
  * @param keyColumn
  * @param bodyColumns
  * @param language
  * @param custom1
  * @param custom2
  * @param custom3
  * @param custom4
  * @return 
  * @throws SearchException
  */
protected IndexResult _indexCustom(String id, Object title, QueryColumn keyColumn, QueryColumn[] bodyColumns,
        String language, Object urlpath, Object custom1, Object custom2, Object custom3, Object custom4)
        throws SearchException {
    _checkLanguage(language);
    String t;
    String url;
    String c1;
    String c2;
    String c3;
    String c4;

    int countExisting = 0;
    int countAdd = keyColumn.size();
    int countNew = 0;

    Map<String, Document> docs = new HashMap<String, Document>();
    IndexWriter writer = null;
    synchronized (token) {
        try {
            // read existing reader
            IndexReader reader = null;
            try {
                reader = _getReader(id, false);
                int len = reader.maxDoc();
                Document doc;
                for (int i = 0; i < len; i++) {
                    doc = reader.document(i);
                    docs.put(doc.getField("key").stringValue(), doc);
                }
            } catch (Exception e) {
            } finally {
                close(reader);
            }

            countExisting = docs.size();
            writer = _getWriter(id, true);
            int len = keyColumn.size();
            String key;
            for (int i = 1; i <= len; i++) {
                key = Caster.toString(keyColumn.get(i, null), null);
                if (key == null)
                    continue;

                StringBuilder body = new StringBuilder();
                for (int y = 0; y < bodyColumns.length; y++) {
                    Object tmp = bodyColumns[y].get(i, null);
                    if (tmp != null) {
                        body.append(tmp.toString());
                        body.append(' ');
                    }
                }
                //t=(title==null)?null:Caster.toString(title.get(i,null),null);
                //url=(urlpath==null)?null:Caster.toString(urlpath.get(i,null),null);

                t = getRow(title, i);
                url = getRow(urlpath, i);
                c1 = getRow(custom1, i);
                c2 = getRow(custom2, i);
                c3 = getRow(custom3, i);
                c4 = getRow(custom4, i);

                docs.put(key, CustomDocument.getDocument(t, key, body.toString(), url, c1, c2, c3, c4));
            }
            countNew = docs.size();
            Iterator<Entry<String, Document>> it = docs.entrySet().iterator();
            Entry<String, Document> entry;
            Document doc;
            while (it.hasNext()) {
                entry = it.next();
                doc = entry.getValue();
                writer.addDocument(doc);
            }
            optimizeEL(writer);
            //writer.optimize();

        } catch (IOException ioe) {
            throw new SearchException(ioe);
        } finally {
            close(writer);
        }
        indexSpellCheck(id);
    }
    int inserts = countNew - countExisting;

    return new IndexResultImpl(0, inserts, countAdd - inserts);
}

From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java

License:Open Source License

@Override
public SearchResulItem[] _search(SearchData data, String criteria, String language, short type,
        String categoryTree, String[] category) throws SearchException {
    try {//from w  w w  .  ja v a  2 s  .  c  om

        if (type != SEARCH_TYPE_SIMPLE)
            throw new SearchException("search type explicit not supported");
        Analyzer analyzer = SearchUtil.getAnalyzer(language);
        Query query = null;
        Op op = null;
        Object highlighter = null;
        lucee.runtime.search.lucene2.query.QueryParser queryParser = new lucee.runtime.search.lucene2.query.QueryParser();
        AddionalAttrs aa = AddionalAttrs.getAddionlAttrs();
        aa.setHasRowHandling(true);
        int startrow = aa.getStartrow();
        int maxrows = aa.getMaxrows();

        if (!criteria.equals("*")) {
            // FUTURE take this data from calling parameters
            op = queryParser.parseOp(criteria);
            if (op == null)
                criteria = "*";
            else
                criteria = op.toString();
            try {

                query = new QueryParser("contents", analyzer).parse(criteria);
                highlighter = Highlight.createHighlighter(query, aa.getContextHighlightBegin(),
                        aa.getContextHighlightEnd());

            } catch (ParseException e) {
                throw new SearchException(e);
            }
        }

        Resource[] files = _getIndexDirectories();

        if (files == null)
            return new SearchResulItem[0];
        ArrayList<SearchResulItem> list = new ArrayList<SearchResulItem>();
        String ct, c;

        ArrayList<String> spellCheckIndex = spellcheck ? new ArrayList<String>() : null;

        int count = 0;
        IndexReader reader = null;
        Searcher searcher = null;
        try {
            outer: for (int i = 0; i < files.length; i++) {
                if (removeCorrupt(files[i]))
                    continue;
                String strFile = files[i].toString();
                SearchIndex si = indexes.get(files[i].getName());

                if (si == null)
                    continue;
                ct = si.getCategoryTree();
                c = ListUtil.arrayToList(si.getCategories(), ",");

                // check category tree
                if (!matchCategoryTree(ct, categoryTree))
                    continue;
                if (!matchCategories(si.getCategories(), category))
                    continue;

                Document doc;
                String id = files[i].getName();
                data.addRecordsSearched(_countDocs(strFile));

                reader = _getReader(id, false);
                if (query == null && "*".equals(criteria)) {
                    int len = reader.numDocs();
                    for (int y = 0; y < len; y++) {
                        if (startrow > ++count)
                            continue;
                        if (maxrows > -1 && list.size() >= maxrows)
                            break outer;
                        doc = reader.document(y);
                        list.add(createSearchResulItem(highlighter, analyzer, doc, id, 1, ct, c,
                                aa.getContextPassages(), aa.getContextBytes()));
                    }
                } else {
                    if (spellcheck)
                        spellCheckIndex.add(id);
                    // search
                    searcher = new IndexSearcher(reader);
                    Hits hits = searcher.search(query);
                    int len = hits.length();
                    for (int y = 0; y < len; y++) {
                        if (startrow > ++count)
                            continue;
                        if (maxrows > -1 && list.size() >= maxrows)
                            break outer;
                        //list.add(new SearchResulItemHits(hits,y,highlighter,analyzer,id,ct,c,aa.getContextPassages(),aa.getContextBytes()));
                        doc = hits.doc(y);
                        list.add(createSearchResulItem(highlighter, analyzer, doc, id, hits.score(y), ct, c,
                                aa.getContextPassages(), aa.getContextBytes()));
                    }

                }

            }
        } finally {
            close(reader);
            close(searcher);
        }

        // spellcheck
        //SearchData data=ThreadLocalSearchData.get();
        if (spellcheck && data != null) {
            if (data.getSuggestionMax() >= list.size()) {

                Map suggestions = data.getSuggestion();
                Iterator it = spellCheckIndex.iterator();
                String id;
                Literal[] literals = queryParser.getLiteralSearchedTerms();
                String[] strLiterals = queryParser.getStringSearchedTerms();
                boolean setSuggestionQuery = false;
                while (it.hasNext()) {
                    id = (String) it.next();
                    // add to set to remove duplicate values
                    SuggestionItem si;
                    SpellChecker sc = getSpellChecker(id);
                    for (int i = 0; i < strLiterals.length; i++) {
                        String[] arr = sc.suggestSimilar(strLiterals[i], 1000);
                        if (arr.length > 0) {
                            literals[i].set("<suggestion>" + arr[0] + "</suggestion>");
                            setSuggestionQuery = true;

                            si = (SuggestionItem) suggestions.get(strLiterals[i]);
                            if (si == null)
                                suggestions.put(strLiterals[i], new SuggestionItem(arr));
                            else
                                si.add(arr);
                        }
                    }
                }
                if (setSuggestionQuery)
                    data.setSuggestionQuery(op.toString());
            }
        }

        return list.toArray(new SearchResulItem[list.size()]);
    } catch (IOException e) {
        throw new SearchException(e);
    }

}

From source file:luceneindexcreator.LuceneIndexCreator.java

public static void main(String[] args) {
    try {/*from   w  ww .j  a v a 2s.c o  m*/
        Comparator<TermStats> comparator = new Comparator<TermStats>() {
            @Override
            public int compare(TermStats t1, TermStats t2) {
                return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1;
            };
        };

        LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY);
        lw.createIndex();

        //Check the index has been created successfully
        Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH));
        IndexReader indexReader = DirectoryReader.open(indexDirectory);

        int numDocs = indexReader.numDocs();
        /* Keywords SORTED BY DATE
         *      //generation of Date indexes and the associated json files of keyword freq            
         *      ArrayList<String> indexedDates = new ArrayList<String>();
         *      for ( int i = 0; i < numDocs; i++){
         *          Document document = indexReader.document(i);
         *          //indexRader.toString(i);
         *          String date = document.get("Date");
         *          if (!contains(indexedDates, date)) {
         *              LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY);
         *              lwd.createSubindexDate(date);
         *              indexedDates.add(date);
         *          }
         *          Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date));
         *          IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate);
         *          HighFreqTerms hTerms = new HighFreqTerms();
         *          JSONArray termResultJSONArray = new JSONArray();
         *          TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator);
         *          //creating json object
         *          for (int j = 0; j < hTermResult.length; j++) {
         *              JSONObject termResultJSON = new JSONObject();
         *              termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString());
         *              termResultJSON.put("Frequency", hTermResult[j].totalTermFreq);
         *              termResultJSONArray.add(termResultJSON);
         *              //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
         *          }
         *          //outputting json
         *          try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) {
         *              file.write(termResultJSONArray.toJSONString());
         *              System.out.println("Successfully Copied JSON Object to File...");
         *              System.out.println("\nJSON Object: " + termResultJSONArray );
         *
         *          }
         *              //date = date.substring(5, 16).trim();
         *              //System.out.println( "d=" + document.get("content"));
         *              //System.out.println("date: " + date + ".");
         *      }
        */

        // keywords sorted by week
        //generation of Date indexes and the associated json files of keyword freq                      
        ArrayList<String> indexedWeeks = new ArrayList<String>();

        //creating subindexes for each week
        for (int i = 0; i < numDocs; i++) {
            Document document = indexReader.document(i);
            //System.out.println(document.get("Week_number"));
            //System.out.println(document.get("Date"));
            String weekNum = document.get("Week_number");
            //System.out.println(weekNum);
            if (!contains(indexedWeeks, weekNum)) {
                LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum,
                        JSON_FILE_PATH_WEEKLY);
                lww.createSubindexWeek(weekNum);
                indexedWeeks.add(weekNum);
            }
        }
        JSONArray json1 = new JSONArray();
        for (String weekNum : indexedWeeks) {
            Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum));
            IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek);
            HighFreqTerms hTerms = new HighFreqTerms();
            TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator);

            //creating json object 
            JSONObject json2 = new JSONObject();
            json2.put("Week", weekNum);
            JSONArray json3 = new JSONArray();
            for (int j = 0; j < hTermResult.length; j++) {
                JSONObject json4 = new JSONObject();
                json4.put("Term", hTermResult[j].termtext.utf8ToString());
                json4.put("Frequency", hTermResult[j].totalTermFreq);
                json3.add(json4);
            }
            json2.put("Terms", json3);
            json1.add(json2);
        }
        //output json
        try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) {
            file.write(json1.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + json1);
        }

        // gets term freq for all docs 
        HighFreqTerms hTerms = new HighFreqTerms();
        JSONArray termResultJSONArray = new JSONArray();

        //array of termStats
        TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator);

        //creating json object
        for (int i = 0; i < hTermResult.length; i++) {
            JSONObject termResultJSON = new JSONObject();
            termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString());
            termResultJSON.put("Frequency", hTermResult[i].totalTermFreq);
            termResultJSONArray.add(termResultJSON);
            //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " +  hTermResult[i].totalTermFreq);
        }
        //outputting json
        try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) {
            file.write(termResultJSONArray.toJSONString());
            System.out.println("Successfully Copied JSON Object to File...");
            System.out.println("\nJSON Object: " + termResultJSONArray);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:lucenesearch.Mallet.java

public void getMalletAllOutput() throws IOException {

    String index = new Searcher().getPostIndexPath();
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));

    PrintWriter pw = new PrintWriter("./data/mallet_all.txt");

    StringBuilder sb = new StringBuilder();

    for (int i = 0; i < reader.maxDoc(); i++) {
        Document doc = reader.document(i);
        System.out.println("Doc " + i);

        ArrayList<String> res = LuceneUtils.getAnalyzedRemoveHtml(doc.get("Body"));

        int id = Integer.parseInt(doc.get("SId"));
        sb = new StringBuilder();
        sb.append(id);/*from   w  ww .  j a va  2s  .  co m*/
        sb.append("\t");
        for (String re : res) {
            re = re.replaceAll("\r\n", " ").replaceAll("\n", " ").replaceAll("<.+?>", "").replaceAll(" +", " ")
                    .replaceAll("[^\\x00-\\x7F]", " ").trim();
            sb.append(re).append(" ");
        }
        sb.append("\n");
        pw.print(sb.toString());

    }
    pw.close();

}

From source file:net.bobah.mail.Indexer.java

License:Apache License

private void runEx() throws Exception {
    final File dir = new File(config.getProperty("net.bobah.mail.local.folder"));
    if (!dir.exists() || !dir.isDirectory()) {
        throw new IllegalArgumentException(String.format("\"%s\" does not exist or is not a directory", dir));
    }//from   w  w w .j a va 2s  .co  m

    Collection<File> files = findFiles(dir, new FileFilter() {
        @Override
        public boolean accept(File file) {
            return file.getName().endsWith(".eml");
        }
    }, new Comparator<File>() {
        @Override
        public int compare(File l, File r) {
            return Long.compare(l.lastModified(), r.lastModified());
        }
    });

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);
    iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
    final File indexDir = new File(dir, "index");

    final boolean indexExisted = indexDir.exists();
    if (!indexExisted)
        indexDir.mkdirs();

    final Directory idx = FSDirectory.open(indexDir);
    final IndexWriter writer = new IndexWriter(idx, iwc);

    final IndexReader reader = indexExisted ? DirectoryReader.open(idx) : null;
    final IndexSearcher searcher = indexExisted ? new IndexSearcher(reader) : null;

    //final AtomicLong counter = new AtomicLong(0l);
    try {
        for (final File file : files) {
            executor.submit(new Runnable() {
                @Override
                public void run() {
                    try {
                        index(file, writer, searcher);
                        //if (counter.incrementAndGet() % 100 == 0) writer.commit(); // TODO: VL: make batch size configurable
                    } catch (Exception e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        }

        shutdownExecutor(executor, log);

        // TODO: VL: delete stale documents from the index

        writer.commit();
        log.info("committed index updates");

        searcher.search(new MatchAllDocsQuery(), new Collector() {
            @Override
            public void setScorer(Scorer scorer) throws IOException {
            }

            @Override
            public void setNextReader(AtomicReaderContext unused) throws IOException {
            }

            @Override
            public void collect(int docID) throws IOException {
                Document doc = reader.document(docID);
                final String path = doc.get("path");
                if (path != null) {
                    try {
                        final File file = new File(path);
                        if (!file.exists()) {
                            log.info("deleting index for {}", doc.get("id"));
                            writer.deleteDocuments(new Term("id", doc.get("id")));
                        }
                    } catch (SecurityException e) {
                        log.error("exception", e);
                    }
                }
            }

            @Override
            public boolean acceptsDocsOutOfOrder() {
                return true;
            }
        });

        writer.commit();
        log.info("committed index deletions");

    } finally {
        try {
            // close writer without commit (see explicit commits above)
            writer.rollback();
        } catch (IOException e) {
            log.error("exception while closing writer", e);
        }
    }
}