List of usage examples for org.apache.lucene.document Document getFields
public final List<IndexableField> getFields()
From source file:br.bireme.mlts.MoreLikeThat.java
License:Open Source License
private Map<String, List<String>> getDocument(final ScoreDoc sdoc) throws CorruptIndexException, IOException { assert sdoc != null; final Map<String, List<String>> ret = new HashMap<String, List<String>>(); final Document hitDoc = is.doc(sdoc.doc); if (hitDoc == null) { throw new IOException("null hit document"); }/*ww w . j a va2s . c om*/ for (Fieldable fld : hitDoc.getFields()) { final String name = fld.name().trim(); final List<String> value; if (ret.containsKey(name)) { value = ret.get(name); } else { value = new ArrayList<String>(); ret.put(name, value); } value.add(fld.stringValue().trim()); } return ret; }
From source file:br.bireme.mlts.utils.Document2JSON.java
License:Open Source License
public static Map<String, List<Fieldable>> getMap(final Document doc) { if (doc == null) { throw new NullPointerException("doc"); }// www . j a va 2 s .c om final Map<String, List<Fieldable>> ret = new LinkedHashMap<String, List<Fieldable>>(); final List<Fieldable> fields = doc.getFields(); for (Fieldable fld : fields) { List<Fieldable> lfld = ret.get(fld.name()); if (lfld == null) { lfld = new ArrayList<Fieldable>(); ret.put(fld.name(), lfld); } lfld.add(fld); } return ret; }
From source file:br.bireme.ngrams.CompareResults.java
private static void writeDocDifferences(final String similarity, final Document doc1, final Document doc2, final BufferedWriter bwriter) throws IOException { assert similarity != null; assert doc1 != null; assert doc2 != null; assert bwriter != null; final StringBuilder builder = new StringBuilder(); final Set<String> diff = new HashSet<>(); final String id1 = doc1.get("id"); final String id2 = doc2.get("id"); for (IndexableField fld : doc1.getFields()) { final String name = fld.name(); if (name.endsWith("~notnormalized")) { if (!name.startsWith("id~")) { final String value1 = fld.stringValue(); final String value2 = doc2.get(name); if (((value1 == null) && (null != value2)) || !value1.equals(value2)) { final String name2 = name.substring(0, name.lastIndexOf('~')); diff.add("[" + name2 + "]|" + value1 + "|" + value2); }//from w ww .ja v a2 s . c om } } } if (diff.isEmpty()) { builder.append("<identical>|"); builder.append(id1 + "|" + id2 + "\n"); } else { if (similarity.equals("1.0")) { builder.append("<very similar>|"); } else { builder.append("<similar>|"); } builder.append(id1 + "|" + id2 + "\n"); for (String di : diff) { builder.append(di); builder.append("\n"); } } builder.append("\n"); bwriter.append(builder.toString()); }
From source file:ca.gnewton.lusql.core.DocumentFactory.java
License:Apache License
public void passivateObject(Object obj) { ++returned;/*from w w w . j a v a2 s. co m*/ //if(returned%1000 == 0) //System.out.println("DocumentFactory returned: " + returned); Document doc = (Document) obj; List<Fieldable> fields = doc.getFields(); Iterator<Fieldable> it = fields.iterator(); while (it.hasNext()) { doc.removeField(((Field) it.next()).name()); } }
From source file:ca.gnewton.lusql.core.ViewIndex.java
License:Apache License
static void printDoc(Document doc, int i) { System.out.println("\nDocument#: " + i); List list = doc.getFields(); Iterator fields = list.iterator(); while (fields.hasNext()) { Field field = (Field) fields.next(); System.out.println("\t" + field.name() + ": [" + field.stringValue() + "]"); }/*from w ww . ja va 2s .com*/ }
From source file:cc.osint.graphd.graph.Graph.java
License:Apache License
public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception { long start_t = System.currentTimeMillis(); final List<JSONObject> results = new ArrayList<JSONObject>(); QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer); qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND); qp.setAllowLeadingWildcard(true);/*from ww w . j ava2s.com*/ Query query = qp.parse(queryStr); org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter( new QueryWrapperFilter(query)); indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() { private int docBase; IndexReader reader; // ignore scoring public void setScorer(Scorer scorer) { } // accept docs out of order public boolean acceptsDocsOutOfOrder() { return true; } public void collect(int doc) { try { Document d = reader.document(doc); JSONObject result = new JSONObject(); for (Fieldable f : d.getFields()) { result.put(f.name(), d.get(f.name())); } results.add(result); } catch (Exception ex) { ex.printStackTrace(); } } public void setNextReader(IndexReader reader, int docBase) { this.reader = reader; this.docBase = docBase; } }); long end_t = System.currentTimeMillis(); //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)"); return results; }
From source file:ch.sentric.hbase.prospective.Percolator.java
License:Apache License
/** * Tries to find a set of queries that match the given document. * * @param doc/*from w w w. j a v a2s . c om*/ * the Lucene document * @return the matching queries * @throws IOException * if an I/O error occurs */ public Response<T> percolate(final Document doc, final Map<T, Query> queries) throws IOException { // first, parse the source doc into a MemoryIndex final MemoryIndex memoryIndex = new MemoryIndex(); for (final Fieldable field : doc.getFields()) { if (!field.isIndexed()) { continue; } final TokenStream tokenStream = field.tokenStreamValue(); if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.getBoost()); } else { final Reader reader = field.readerValue(); if (reader != null) { memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), reader), field.getBoost()); } else { final String value = field.stringValue(); if (value != null) { memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), new CharSequenceReader(value)), field.getBoost()); } } } } // do the search final IndexSearcher searcher = memoryIndex.createSearcher(); final Map<T, Query> matches = new HashMap<T, Query>(0); if (queries != null && !queries.isEmpty()) { final ExistsCollector collector = new ExistsCollector(); for (final Map.Entry<T, Query> entry : queries.entrySet()) { collector.reset(); searcher.search(entry.getValue(), collector); if (collector.exists()) { matches.put(entry.getKey(), entry.getValue()); } } } return new Response<T>(matches); }
From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java
License:Apache License
private static Map<String, String[]> convert(Document document, SearchRequest.SearchType searchType) { Map<String, String[]> map = new HashMap<String, String[]>(); if (document != null) { List<IndexableField> fields = document.getFields(); Iterator<IndexableField> iter = fields.iterator(); while (iter.hasNext()) { IndexableField fld = iter.next(); String fieldname = fld.name(); if (searchType == SearchRequest.SearchType.SIMPLE_QUERY) { if (fieldname.equals("_path") || fieldname.equals("_name")) { map.put(fieldname, document.getValues(fieldname)); }// w w w . jav a 2 s.co m } else if (searchType == SearchRequest.SearchType.QUERY_AND_FETCH) { map.put(fieldname, document.getValues(fieldname)); } } } return map; }
From source file:com.baidu.rigel.biplatform.tesseract.resultset.isservice.ResultRecord.java
License:Open Source License
/** * ResultRecord// w w w.j a v a 2 s . com * * @param doc * doc */ public ResultRecord(Document doc) { super(); List<IndexableField> idxFields = doc.getFields(); List<String> fieldNameList = new ArrayList<String>(); List<String> fieldList = new ArrayList<String>(); for (IndexableField field : idxFields) { fieldNameList.add(field.name()); fieldList.add(field.stringValue()); } this.fieldArray = fieldList.toArray(new String[0]); this.meta = new Meta(fieldNameList.toArray(new String[0])); }
From source file:com.basistech.lucene.tools.LuceneQueryTool.java
License:Apache License
private void printDocument(Document doc, int id, float score, PrintStream out) { Multimap<String, String> data = ArrayListMultimap.create(); List<String> orderedFieldNames = Lists.newArrayList(); if (showId) { orderedFieldNames.add("<id>"); data.put("<id>", Integer.toString(id)); }/*w w w. j av a 2s. c o m*/ if (showScore) { orderedFieldNames.add("<score>"); data.put("<score>", Double.toString(score)); } orderedFieldNames.addAll(fieldNames); Set<String> setFieldNames = Sets.newHashSet(); if (fieldNames.isEmpty()) { for (IndexableField f : doc.getFields()) { if (!setFieldNames.contains(f.name())) { orderedFieldNames.add(f.name()); } setFieldNames.add(f.name()); } } else { setFieldNames.addAll(fieldNames); } if (sortFields) { Collections.sort(orderedFieldNames); } for (IndexableField f : doc.getFields()) { if (setFieldNames.contains(f.name())) { if (f.stringValue() != null) { data.put(f.name(), f.stringValue()); } else if (f.binaryValue() != null) { data.put(f.name(), formatBinary(f.binaryValue().bytes)); } else { data.put(f.name(), "null"); } } } if (docsPrinted == 0 && formatter.getFormat() == Formatter.Format.TABULAR && !formatter.suppressNames()) { out.println(Joiner.on('\t').join(orderedFieldNames)); } String formatted = formatter.format(orderedFieldNames, data); if (!formatted.isEmpty()) { if (docsPrinted > 0 && formatter.getFormat() == Formatter.Format.MULTILINE) { out.println(); } out.println(formatted); ++docsPrinted; } }