List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:liredemo.SearchResultsTableModel.java
License:Open Source License
/** * @param hits/*from w ww . j ava2s . c o m*/ * @param progress * @param reader */ public void setHits(ImageSearchHits hits, JProgressBar progress, IndexReader reader) { this.hits = hits; icons = new ArrayList<ImageIcon>(hits.length()); if (progress != null) progress.setString("Searching finished. Loading images for result list."); for (int i = 0; i < hits.length(); i++) { ImageIcon icon = null; try { BufferedImage img = null; String fileIdentifier = reader.document(hits.documentID(i)) .getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue(); if (!fileIdentifier.startsWith("http:")) { // check isf it is a jpg file ... if (fileIdentifier.toLowerCase().endsWith(".jpg")) { Metadata metadata = new ExifReader(new FileInputStream(fileIdentifier)).extract(); if (metadata.containsDirectory(ExifDirectory.class)) { ExifDirectory exifDirectory = (ExifDirectory) metadata .getDirectory(ExifDirectory.class); if (exifDirectory.containsThumbnail()) { img = ImageIO.read(new ByteArrayInputStream(exifDirectory.getThumbnailData())); } } } if (img == null) { img = ImageIO.read(new FileInputStream(fileIdentifier)); } } else { img = ImageIO.read(new URL(fileIdentifier)); } icon = new ImageIcon(ImageUtils.scaleImage(img, 200)); if (progress != null) progress.setValue((i * 100) / hits.length()); } catch (Exception ex) { Logger.getLogger("global").log(Level.SEVERE, null, ex); } icons.add(icon); } if (progress != null) progress.setValue(100); fireTableDataChanged(); }
From source file:lius.lucene.LuceneActions.java
License:Apache License
public synchronized List ListAllDocuments(String indexDir, LiusConfig lc) { List documentsList = new ArrayList(); List fieldList = lc.getBrowseFieldsToDisplay(); Map values = null;//from w w w. ja v a 2s . c o m LiusHit lh = null; try { Directory directory = FSDirectory.getDirectory(indexDir, false); IndexReader ir = IndexReader.open(directory); int num = ir.numDocs(); for (int i = 0; i <= num - 1; i++) { lh = new LiusHit(); values = new HashMap(); Document luceneDoc = ir.document(i); lh.setDocId(i); for (int j = 0; j < fieldList.size(); j++) { LiusField lf = (LiusField) fieldList.get(j); Field f = luceneDoc.getField(lf.getName()); LiusField nlf = new LiusField(); nlf.setName(lf.getName()); nlf.setLabel(lf.getLabel()); if (f != null) { String content = f.stringValue(); nlf.setValue(content); values.put(lf.getName(), nlf); } } lh.setLiusFieldsMap(values); documentsList.add(lh); } if (ir != null) { ir.close(); } } catch (IOException e) { logger.error(e.getMessage()); } return documentsList; }
From source file:lucandra.LucandraTests.java
License:Apache License
public void testWildcardQuery() throws Exception { IndexReader indexReader = new IndexReader(indexName, client); IndexSearcher searcher = new IndexSearcher(indexReader); QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer); // check wildcard Query q = qp.parse("+key:anoth*"); TopDocs docs = searcher.search(q, 10); assertEquals(1, docs.totalHits);/* www . ja v a2s . co m*/ Document d = indexReader.document(1); String val = d.get("key"); assertTrue(val.equals("this is another example")); // check wildcard q = qp.parse("+date:test*"); docs = searcher.search(q, 10); assertEquals(101, docs.totalHits); }
From source file:lucandra.LucandraTests.java
License:Apache License
public void testSortQuery() throws Exception { IndexReader indexReader = new IndexReader(indexName, client); IndexSearcher searcher = new IndexSearcher(indexReader); QueryParser qp = new QueryParser(Version.LUCENE_CURRENT, "key", analyzer); // check sort Sort sort = new Sort(new SortField("date", SortField.STRING)); Query q = qp.parse("+key:sort"); TopDocs docs = searcher.search(q, null, 10, sort); for (int i = 0; i < 10; i++) { Document d = indexReader.document(docs.scoreDocs[i].doc); String dval = d.get("date"); assertEquals("test" + (i + 200), dval); }/* w w w . j a v a2 s .c o m*/ }
From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java
License:Open Source License
/** * @param id/*from www . j av a2 s . com*/ * @param title * @param keyColumn * @param bodyColumns * @param language * @param custom1 * @param custom2 * @param custom3 * @param custom4 * @return * @throws SearchException */ protected IndexResult _deleteCustom(String id, QueryColumn keyColumn) throws SearchException { int countBefore = 0; int countAfter = 0; Map<String, Document> docs = new HashMap<String, Document>(); Set<String> keys = toSet(keyColumn); IndexWriter writer = null; String key; IndexReader reader = null; Document doc; synchronized (token) { try { try { reader = _getReader(id, false); countBefore = reader.maxDoc(); for (int i = 0; i < countBefore; i++) { doc = reader.document(i); key = doc.getField("key").stringValue(); if (!keys.contains(key)) docs.put(key, doc); } } catch (Exception e) { } finally { close(reader); } countAfter = docs.size(); writer = _getWriter(id, true); Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); while (it.hasNext()) { writer.addDocument(it.next().getValue()); } optimizeEL(writer); } catch (IOException e) { throw new SearchException(e); } finally { close(writer); } indexSpellCheck(id); } int removes = countBefore - countAfter; return new IndexResultImpl(removes, 0, 0); }
From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java
License:Open Source License
/** * @param id/*from w ww.j a v a 2s.co m*/ * @param title * @param keyColumn * @param bodyColumns * @param language * @param custom1 * @param custom2 * @param custom3 * @param custom4 * @return * @throws SearchException */ protected IndexResult _indexCustom(String id, Object title, QueryColumn keyColumn, QueryColumn[] bodyColumns, String language, Object urlpath, Object custom1, Object custom2, Object custom3, Object custom4) throws SearchException { _checkLanguage(language); String t; String url; String c1; String c2; String c3; String c4; int countExisting = 0; int countAdd = keyColumn.size(); int countNew = 0; Map<String, Document> docs = new HashMap<String, Document>(); IndexWriter writer = null; synchronized (token) { try { // read existing reader IndexReader reader = null; try { reader = _getReader(id, false); int len = reader.maxDoc(); Document doc; for (int i = 0; i < len; i++) { doc = reader.document(i); docs.put(doc.getField("key").stringValue(), doc); } } catch (Exception e) { } finally { close(reader); } countExisting = docs.size(); writer = _getWriter(id, true); int len = keyColumn.size(); String key; for (int i = 1; i <= len; i++) { key = Caster.toString(keyColumn.get(i, null), null); if (key == null) continue; StringBuilder body = new StringBuilder(); for (int y = 0; y < bodyColumns.length; y++) { Object tmp = bodyColumns[y].get(i, null); if (tmp != null) { body.append(tmp.toString()); body.append(' '); } } //t=(title==null)?null:Caster.toString(title.get(i,null),null); //url=(urlpath==null)?null:Caster.toString(urlpath.get(i,null),null); t = getRow(title, i); url = getRow(urlpath, i); c1 = getRow(custom1, i); c2 = getRow(custom2, i); c3 = getRow(custom3, i); c4 = getRow(custom4, i); docs.put(key, CustomDocument.getDocument(t, key, body.toString(), url, c1, c2, c3, c4)); } countNew = docs.size(); Iterator<Entry<String, Document>> it = docs.entrySet().iterator(); Entry<String, Document> entry; Document doc; while (it.hasNext()) { entry = it.next(); doc = entry.getValue(); writer.addDocument(doc); } optimizeEL(writer); //writer.optimize(); } catch (IOException ioe) { throw new SearchException(ioe); } finally { close(writer); } indexSpellCheck(id); } int inserts = countNew - countExisting; return new IndexResultImpl(0, inserts, countAdd - inserts); }
From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java
License:Open Source License
@Override public SearchResulItem[] _search(SearchData data, String criteria, String language, short type, String categoryTree, String[] category) throws SearchException { try {//from w w w . ja v a 2 s . c om if (type != SEARCH_TYPE_SIMPLE) throw new SearchException("search type explicit not supported"); Analyzer analyzer = SearchUtil.getAnalyzer(language); Query query = null; Op op = null; Object highlighter = null; lucee.runtime.search.lucene2.query.QueryParser queryParser = new lucee.runtime.search.lucene2.query.QueryParser(); AddionalAttrs aa = AddionalAttrs.getAddionlAttrs(); aa.setHasRowHandling(true); int startrow = aa.getStartrow(); int maxrows = aa.getMaxrows(); if (!criteria.equals("*")) { // FUTURE take this data from calling parameters op = queryParser.parseOp(criteria); if (op == null) criteria = "*"; else criteria = op.toString(); try { query = new QueryParser("contents", analyzer).parse(criteria); highlighter = Highlight.createHighlighter(query, aa.getContextHighlightBegin(), aa.getContextHighlightEnd()); } catch (ParseException e) { throw new SearchException(e); } } Resource[] files = _getIndexDirectories(); if (files == null) return new SearchResulItem[0]; ArrayList<SearchResulItem> list = new ArrayList<SearchResulItem>(); String ct, c; ArrayList<String> spellCheckIndex = spellcheck ? new ArrayList<String>() : null; int count = 0; IndexReader reader = null; Searcher searcher = null; try { outer: for (int i = 0; i < files.length; i++) { if (removeCorrupt(files[i])) continue; String strFile = files[i].toString(); SearchIndex si = indexes.get(files[i].getName()); if (si == null) continue; ct = si.getCategoryTree(); c = ListUtil.arrayToList(si.getCategories(), ","); // check category tree if (!matchCategoryTree(ct, categoryTree)) continue; if (!matchCategories(si.getCategories(), category)) continue; Document doc; String id = files[i].getName(); data.addRecordsSearched(_countDocs(strFile)); reader = _getReader(id, false); if (query == null && "*".equals(criteria)) { int len = reader.numDocs(); for (int y = 0; y < len; y++) { if (startrow > ++count) continue; if (maxrows > -1 && list.size() >= maxrows) break outer; doc = reader.document(y); list.add(createSearchResulItem(highlighter, analyzer, doc, id, 1, ct, c, aa.getContextPassages(), aa.getContextBytes())); } } else { if (spellcheck) spellCheckIndex.add(id); // search searcher = new IndexSearcher(reader); Hits hits = searcher.search(query); int len = hits.length(); for (int y = 0; y < len; y++) { if (startrow > ++count) continue; if (maxrows > -1 && list.size() >= maxrows) break outer; //list.add(new SearchResulItemHits(hits,y,highlighter,analyzer,id,ct,c,aa.getContextPassages(),aa.getContextBytes())); doc = hits.doc(y); list.add(createSearchResulItem(highlighter, analyzer, doc, id, hits.score(y), ct, c, aa.getContextPassages(), aa.getContextBytes())); } } } } finally { close(reader); close(searcher); } // spellcheck //SearchData data=ThreadLocalSearchData.get(); if (spellcheck && data != null) { if (data.getSuggestionMax() >= list.size()) { Map suggestions = data.getSuggestion(); Iterator it = spellCheckIndex.iterator(); String id; Literal[] literals = queryParser.getLiteralSearchedTerms(); String[] strLiterals = queryParser.getStringSearchedTerms(); boolean setSuggestionQuery = false; while (it.hasNext()) { id = (String) it.next(); // add to set to remove duplicate values SuggestionItem si; SpellChecker sc = getSpellChecker(id); for (int i = 0; i < strLiterals.length; i++) { String[] arr = sc.suggestSimilar(strLiterals[i], 1000); if (arr.length > 0) { literals[i].set("<suggestion>" + arr[0] + "</suggestion>"); setSuggestionQuery = true; si = (SuggestionItem) suggestions.get(strLiterals[i]); if (si == null) suggestions.put(strLiterals[i], new SuggestionItem(arr)); else si.add(arr); } } } if (setSuggestionQuery) data.setSuggestionQuery(op.toString()); } } return list.toArray(new SearchResulItem[list.size()]); } catch (IOException e) { throw new SearchException(e); } }
From source file:luceneindexcreator.LuceneIndexCreator.java
public static void main(String[] args) { try {/*from w ww .j a v a 2s.c o m*/ Comparator<TermStats> comparator = new Comparator<TermStats>() { @Override public int compare(TermStats t1, TermStats t2) { return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1; }; }; LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY); lw.createIndex(); //Check the index has been created successfully Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH)); IndexReader indexReader = DirectoryReader.open(indexDirectory); int numDocs = indexReader.numDocs(); /* Keywords SORTED BY DATE * //generation of Date indexes and the associated json files of keyword freq * ArrayList<String> indexedDates = new ArrayList<String>(); * for ( int i = 0; i < numDocs; i++){ * Document document = indexReader.document(i); * //indexRader.toString(i); * String date = document.get("Date"); * if (!contains(indexedDates, date)) { * LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY); * lwd.createSubindexDate(date); * indexedDates.add(date); * } * Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date)); * IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate); * HighFreqTerms hTerms = new HighFreqTerms(); * JSONArray termResultJSONArray = new JSONArray(); * TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator); * //creating json object * for (int j = 0; j < hTermResult.length; j++) { * JSONObject termResultJSON = new JSONObject(); * termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString()); * termResultJSON.put("Frequency", hTermResult[j].totalTermFreq); * termResultJSONArray.add(termResultJSON); * //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " + hTermResult[i].totalTermFreq); * } * //outputting json * try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) { * file.write(termResultJSONArray.toJSONString()); * System.out.println("Successfully Copied JSON Object to File..."); * System.out.println("\nJSON Object: " + termResultJSONArray ); * * } * //date = date.substring(5, 16).trim(); * //System.out.println( "d=" + document.get("content")); * //System.out.println("date: " + date + "."); * } */ // keywords sorted by week //generation of Date indexes and the associated json files of keyword freq ArrayList<String> indexedWeeks = new ArrayList<String>(); //creating subindexes for each week for (int i = 0; i < numDocs; i++) { Document document = indexReader.document(i); //System.out.println(document.get("Week_number")); //System.out.println(document.get("Date")); String weekNum = document.get("Week_number"); //System.out.println(weekNum); if (!contains(indexedWeeks, weekNum)) { LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum, JSON_FILE_PATH_WEEKLY); lww.createSubindexWeek(weekNum); indexedWeeks.add(weekNum); } } JSONArray json1 = new JSONArray(); for (String weekNum : indexedWeeks) { Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum)); IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek); HighFreqTerms hTerms = new HighFreqTerms(); TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator); //creating json object JSONObject json2 = new JSONObject(); json2.put("Week", weekNum); JSONArray json3 = new JSONArray(); for (int j = 0; j < hTermResult.length; j++) { JSONObject json4 = new JSONObject(); json4.put("Term", hTermResult[j].termtext.utf8ToString()); json4.put("Frequency", hTermResult[j].totalTermFreq); json3.add(json4); } json2.put("Terms", json3); json1.add(json2); } //output json try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) { file.write(json1.toJSONString()); System.out.println("Successfully Copied JSON Object to File..."); System.out.println("\nJSON Object: " + json1); } // gets term freq for all docs HighFreqTerms hTerms = new HighFreqTerms(); JSONArray termResultJSONArray = new JSONArray(); //array of termStats TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator); //creating json object for (int i = 0; i < hTermResult.length; i++) { JSONObject termResultJSON = new JSONObject(); termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString()); termResultJSON.put("Frequency", hTermResult[i].totalTermFreq); termResultJSONArray.add(termResultJSON); //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " + hTermResult[i].totalTermFreq); } //outputting json try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) { file.write(termResultJSONArray.toJSONString()); System.out.println("Successfully Copied JSON Object to File..."); System.out.println("\nJSON Object: " + termResultJSONArray); } } catch (Exception e) { e.printStackTrace(); } }
From source file:lucenesearch.Mallet.java
public void getMalletAllOutput() throws IOException { String index = new Searcher().getPostIndexPath(); IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); PrintWriter pw = new PrintWriter("./data/mallet_all.txt"); StringBuilder sb = new StringBuilder(); for (int i = 0; i < reader.maxDoc(); i++) { Document doc = reader.document(i); System.out.println("Doc " + i); ArrayList<String> res = LuceneUtils.getAnalyzedRemoveHtml(doc.get("Body")); int id = Integer.parseInt(doc.get("SId")); sb = new StringBuilder(); sb.append(id);/*from w ww . j a va 2s . co m*/ sb.append("\t"); for (String re : res) { re = re.replaceAll("\r\n", " ").replaceAll("\n", " ").replaceAll("<.+?>", "").replaceAll(" +", " ") .replaceAll("[^\\x00-\\x7F]", " ").trim(); sb.append(re).append(" "); } sb.append("\n"); pw.print(sb.toString()); } pw.close(); }
From source file:net.bobah.mail.Indexer.java
License:Apache License
private void runEx() throws Exception { final File dir = new File(config.getProperty("net.bobah.mail.local.folder")); if (!dir.exists() || !dir.isDirectory()) { throw new IllegalArgumentException(String.format("\"%s\" does not exist or is not a directory", dir)); }//from w w w .j a va 2s .co m Collection<File> files = findFiles(dir, new FileFilter() { @Override public boolean accept(File file) { return file.getName().endsWith(".eml"); } }, new Comparator<File>() { @Override public int compare(File l, File r) { return Long.compare(l.lastModified(), r.lastModified()); } }); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); final File indexDir = new File(dir, "index"); final boolean indexExisted = indexDir.exists(); if (!indexExisted) indexDir.mkdirs(); final Directory idx = FSDirectory.open(indexDir); final IndexWriter writer = new IndexWriter(idx, iwc); final IndexReader reader = indexExisted ? DirectoryReader.open(idx) : null; final IndexSearcher searcher = indexExisted ? new IndexSearcher(reader) : null; //final AtomicLong counter = new AtomicLong(0l); try { for (final File file : files) { executor.submit(new Runnable() { @Override public void run() { try { index(file, writer, searcher); //if (counter.incrementAndGet() % 100 == 0) writer.commit(); // TODO: VL: make batch size configurable } catch (Exception e) { throw new RuntimeException(e); } } }); } shutdownExecutor(executor, log); // TODO: VL: delete stale documents from the index writer.commit(); log.info("committed index updates"); searcher.search(new MatchAllDocsQuery(), new Collector() { @Override public void setScorer(Scorer scorer) throws IOException { } @Override public void setNextReader(AtomicReaderContext unused) throws IOException { } @Override public void collect(int docID) throws IOException { Document doc = reader.document(docID); final String path = doc.get("path"); if (path != null) { try { final File file = new File(path); if (!file.exists()) { log.info("deleting index for {}", doc.get("id")); writer.deleteDocuments(new Term("id", doc.get("id"))); } } catch (SecurityException e) { log.error("exception", e); } } } @Override public boolean acceptsDocsOutOfOrder() { return true; } }); writer.commit(); log.info("committed index deletions"); } finally { try { // close writer without commit (see explicit commits above) writer.rollback(); } catch (IOException e) { log.error("exception while closing writer", e); } } }