Example usage for org.apache.lucene.document Document getFields

List of usage examples for org.apache.lucene.document Document getFields

Introduction

In this page you can find the example usage for org.apache.lucene.document Document getFields.

Prototype

public final List<IndexableField> getFields() 

Source Link

Document

Returns a List of all the fields in a document.

Usage

From source file:br.bireme.mlts.MoreLikeThat.java

License:Open Source License

private Map<String, List<String>> getDocument(final ScoreDoc sdoc) throws CorruptIndexException, IOException {
    assert sdoc != null;

    final Map<String, List<String>> ret = new HashMap<String, List<String>>();
    final Document hitDoc = is.doc(sdoc.doc);

    if (hitDoc == null) {
        throw new IOException("null hit document");
    }/*ww  w . j  a  va2s .  c om*/
    for (Fieldable fld : hitDoc.getFields()) {
        final String name = fld.name().trim();
        final List<String> value;

        if (ret.containsKey(name)) {
            value = ret.get(name);
        } else {
            value = new ArrayList<String>();
            ret.put(name, value);
        }
        value.add(fld.stringValue().trim());
    }
    return ret;
}

From source file:br.bireme.mlts.utils.Document2JSON.java

License:Open Source License

public static Map<String, List<Fieldable>> getMap(final Document doc) {
    if (doc == null) {
        throw new NullPointerException("doc");
    }// www .  j  a va  2  s .c om

    final Map<String, List<Fieldable>> ret = new LinkedHashMap<String, List<Fieldable>>();
    final List<Fieldable> fields = doc.getFields();
    for (Fieldable fld : fields) {
        List<Fieldable> lfld = ret.get(fld.name());
        if (lfld == null) {
            lfld = new ArrayList<Fieldable>();
            ret.put(fld.name(), lfld);
        }
        lfld.add(fld);
    }

    return ret;
}

From source file:br.bireme.ngrams.CompareResults.java

private static void writeDocDifferences(final String similarity, final Document doc1, final Document doc2,
        final BufferedWriter bwriter) throws IOException {
    assert similarity != null;
    assert doc1 != null;
    assert doc2 != null;
    assert bwriter != null;

    final StringBuilder builder = new StringBuilder();
    final Set<String> diff = new HashSet<>();
    final String id1 = doc1.get("id");
    final String id2 = doc2.get("id");

    for (IndexableField fld : doc1.getFields()) {
        final String name = fld.name();

        if (name.endsWith("~notnormalized")) {
            if (!name.startsWith("id~")) {
                final String value1 = fld.stringValue();
                final String value2 = doc2.get(name);
                if (((value1 == null) && (null != value2)) || !value1.equals(value2)) {
                    final String name2 = name.substring(0, name.lastIndexOf('~'));
                    diff.add("[" + name2 + "]|" + value1 + "|" + value2);
                }//from   w  ww .ja v a2  s .  c om
            }
        }
    }
    if (diff.isEmpty()) {
        builder.append("<identical>|");
        builder.append(id1 + "|" + id2 + "\n");
    } else {
        if (similarity.equals("1.0")) {
            builder.append("<very similar>|");
        } else {
            builder.append("<similar>|");
        }
        builder.append(id1 + "|" + id2 + "\n");
        for (String di : diff) {
            builder.append(di);
            builder.append("\n");
        }
    }
    builder.append("\n");
    bwriter.append(builder.toString());
}

From source file:ca.gnewton.lusql.core.DocumentFactory.java

License:Apache License

public void passivateObject(Object obj) {
    ++returned;/*from   w w w  . j a  v a2 s.  co  m*/
    //if(returned%1000 == 0)
    //System.out.println("DocumentFactory returned: " + returned);
    Document doc = (Document) obj;
    List<Fieldable> fields = doc.getFields();
    Iterator<Fieldable> it = fields.iterator();
    while (it.hasNext()) {
        doc.removeField(((Field) it.next()).name());
    }
}

From source file:ca.gnewton.lusql.core.ViewIndex.java

License:Apache License

static void printDoc(Document doc, int i) {
    System.out.println("\nDocument#: " + i);
    List list = doc.getFields();
    Iterator fields = list.iterator();
    while (fields.hasNext()) {
        Field field = (Field) fields.next();
        System.out.println("\t" + field.name() + ": [" + field.stringValue() + "]");
    }/*from w ww .  ja  va  2s  .com*/
}

From source file:cc.osint.graphd.graph.Graph.java

License:Apache License

public List<JSONObject> query(IndexSearcher indexSearcher, String queryStr) throws Exception {
    long start_t = System.currentTimeMillis();
    final List<JSONObject> results = new ArrayList<JSONObject>();
    QueryParser qp = new QueryParser(Version.LUCENE_31, KEY_FIELD, analyzer);
    qp.setDefaultOperator(org.apache.lucene.queryParser.QueryParser.Operator.AND);
    qp.setAllowLeadingWildcard(true);/*from  ww  w .  j ava2s.com*/
    Query query = qp.parse(queryStr);
    org.apache.lucene.search.Filter filter = new org.apache.lucene.search.CachingWrapperFilter(
            new QueryWrapperFilter(query));

    indexSearcher.search(new MatchAllDocsQuery(), filter, new Collector() {
        private int docBase;
        IndexReader reader;

        // ignore scoring
        public void setScorer(Scorer scorer) {
        }

        // accept docs out of order
        public boolean acceptsDocsOutOfOrder() {
            return true;
        }

        public void collect(int doc) {
            try {
                Document d = reader.document(doc);
                JSONObject result = new JSONObject();
                for (Fieldable f : d.getFields()) {
                    result.put(f.name(), d.get(f.name()));
                }
                results.add(result);
            } catch (Exception ex) {
                ex.printStackTrace();
            }
        }

        public void setNextReader(IndexReader reader, int docBase) {
            this.reader = reader;
            this.docBase = docBase;
        }
    });
    long end_t = System.currentTimeMillis();
    //log.info("query: hits.scoreDocs.length = " + results.size() + " (" + (end_t-start_t) + "ms)");
    return results;
}

From source file:ch.sentric.hbase.prospective.Percolator.java

License:Apache License

/**
* Tries to find a set of queries that match the given document.
* 
* @param doc/*from  w  w  w.  j  a  v a2s .  c om*/
*            the Lucene document
* @return the matching queries
* @throws IOException
*             if an I/O error occurs
*/
public Response<T> percolate(final Document doc, final Map<T, Query> queries) throws IOException {
    // first, parse the source doc into a MemoryIndex
    final MemoryIndex memoryIndex = new MemoryIndex();

    for (final Fieldable field : doc.getFields()) {
        if (!field.isIndexed()) {
            continue;
        }

        final TokenStream tokenStream = field.tokenStreamValue();
        if (tokenStream != null) {
            memoryIndex.addField(field.name(), tokenStream, field.getBoost());
        } else {
            final Reader reader = field.readerValue();
            if (reader != null) {
                memoryIndex.addField(field.name(), analyzer.reusableTokenStream(field.name(), reader),
                        field.getBoost());
            } else {
                final String value = field.stringValue();
                if (value != null) {
                    memoryIndex.addField(field.name(),
                            analyzer.reusableTokenStream(field.name(), new CharSequenceReader(value)),
                            field.getBoost());
                }
            }
        }
    }

    // do the search
    final IndexSearcher searcher = memoryIndex.createSearcher();
    final Map<T, Query> matches = new HashMap<T, Query>(0);

    if (queries != null && !queries.isEmpty()) {
        final ExistsCollector collector = new ExistsCollector();
        for (final Map.Entry<T, Query> entry : queries.entrySet()) {
            collector.reset();
            searcher.search(entry.getValue(), collector);
            if (collector.exists()) {
                matches.put(entry.getKey(), entry.getValue());
            }
        }
    }

    return new Response<T>(matches);
}

From source file:cn.hbu.cs.esearch.service.impl.EsearchSearchServiceImpl.java

License:Apache License

private static Map<String, String[]> convert(Document document, SearchRequest.SearchType searchType) {
    Map<String, String[]> map = new HashMap<String, String[]>();
    if (document != null) {
        List<IndexableField> fields = document.getFields();
        Iterator<IndexableField> iter = fields.iterator();
        while (iter.hasNext()) {
            IndexableField fld = iter.next();
            String fieldname = fld.name();
            if (searchType == SearchRequest.SearchType.SIMPLE_QUERY) {
                if (fieldname.equals("_path") || fieldname.equals("_name")) {
                    map.put(fieldname, document.getValues(fieldname));
                }//  w w w  .  jav  a  2 s.co m
            } else if (searchType == SearchRequest.SearchType.QUERY_AND_FETCH) {
                map.put(fieldname, document.getValues(fieldname));
            }
        }
    }
    return map;
}

From source file:com.baidu.rigel.biplatform.tesseract.resultset.isservice.ResultRecord.java

License:Open Source License

/**
 * ResultRecord// w w w.j  a v  a  2  s . com
 * 
 * @param doc
 *            doc
 */
public ResultRecord(Document doc) {
    super();
    List<IndexableField> idxFields = doc.getFields();
    List<String> fieldNameList = new ArrayList<String>();
    List<String> fieldList = new ArrayList<String>();
    for (IndexableField field : idxFields) {
        fieldNameList.add(field.name());
        fieldList.add(field.stringValue());
    }

    this.fieldArray = fieldList.toArray(new String[0]);
    this.meta = new Meta(fieldNameList.toArray(new String[0]));
}

From source file:com.basistech.lucene.tools.LuceneQueryTool.java

License:Apache License

private void printDocument(Document doc, int id, float score, PrintStream out) {
    Multimap<String, String> data = ArrayListMultimap.create();
    List<String> orderedFieldNames = Lists.newArrayList();
    if (showId) {
        orderedFieldNames.add("<id>");
        data.put("<id>", Integer.toString(id));
    }/*w  w w. j av  a  2s. c o m*/
    if (showScore) {
        orderedFieldNames.add("<score>");
        data.put("<score>", Double.toString(score));
    }
    orderedFieldNames.addAll(fieldNames);

    Set<String> setFieldNames = Sets.newHashSet();
    if (fieldNames.isEmpty()) {
        for (IndexableField f : doc.getFields()) {
            if (!setFieldNames.contains(f.name())) {
                orderedFieldNames.add(f.name());
            }
            setFieldNames.add(f.name());
        }
    } else {
        setFieldNames.addAll(fieldNames);
    }
    if (sortFields) {
        Collections.sort(orderedFieldNames);
    }

    for (IndexableField f : doc.getFields()) {
        if (setFieldNames.contains(f.name())) {
            if (f.stringValue() != null) {
                data.put(f.name(), f.stringValue());
            } else if (f.binaryValue() != null) {
                data.put(f.name(), formatBinary(f.binaryValue().bytes));
            } else {
                data.put(f.name(), "null");
            }
        }
    }

    if (docsPrinted == 0 && formatter.getFormat() == Formatter.Format.TABULAR && !formatter.suppressNames()) {

        out.println(Joiner.on('\t').join(orderedFieldNames));
    }

    String formatted = formatter.format(orderedFieldNames, data);
    if (!formatted.isEmpty()) {
        if (docsPrinted > 0 && formatter.getFormat() == Formatter.Format.MULTILINE) {
            out.println();
        }
        out.println(formatted);
        ++docsPrinted;
    }
}