Example usage for org.apache.lucene.index IndexableField stringValue

List of usage examples for org.apache.lucene.index IndexableField stringValue

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexableField stringValue.

Prototype

public String stringValue();

Source Link

Document

Non-null if this field has a string value

Usage

From source file:KNearestNeighborClassifier.java

License:Apache License

/**
 * build a list of classification results from search results
 * @param topDocs the search results as a {@link TopDocs} object
 * @return a {@link List} of {@link ClassificationResult}, one for each existing class
 * @throws IOException if it's not possible to get the stored value of class field
 *///from  www  .ja  v a 2s  .c  o m
protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:alix.lucene.MoreLikeThis.java

License:Apache License

/**
 * Find words for a more-like-this query former.
 *
 * @param docNum the id of the lucene document from which to find terms
 *//*from  w w  w.ja  v a 2  s.com*/
private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException {
    Map<String, Int> termFreqMap = new HashMap<>();
    for (String fieldName : fieldNames) {
        final Terms vector = ir.getTermVector(docNum, fieldName);
        // field does not store term vector info
        if (vector == null) {
            Document d = ir.document(docNum);
            IndexableField[] fields = d.getFields(fieldName);
            for (IndexableField field : fields) {
                final String stringValue = field.stringValue();
                if (stringValue != null) {
                    addTermFrequencies(new StringReader(stringValue), termFreqMap, fieldName);
                }
            }
        } else {
            addTermFrequencies(termFreqMap, vector);
        }
    }

    return createQueue(termFreqMap);
}

From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.utils.NewsItemLuceneDocConverter.java

License:Apache License

/**
 * Converts a Lucene Document to a NewsItem
 *
 * @param d//from  w w  w . j av  a  2 s.  c om
 * @return
 */
public static RecommendedNewsItem documentToNewsItem(Document d) {
    RecommendedNewsItem item = new RecommendedNewsItem();
    IndexableField field;

    field = d.getField("description");
    if (field != null) {
        item.setDescription(field.stringValue());
    } else {
        item.setDescription("No description available");
    }

    field = d.getField("source");
    if (field != null) {
        item.setSource(field.stringValue());
    } else {
        item.setSource("No source available");
    }

    field = d.getField("text");
    if (field != null) {
        item.setFulltext(field.stringValue());
    } else {
        item.setFulltext("No text available");
    }

    field = d.getField("id");
    if (field != null) {
        item.setId(field.stringValue());
    } else {
        item.setId("");
    }

    field = d.getField("imageUrl");
    if (field != null) {
        try {
            item.setImageUrl(new URL(field.stringValue()));
        } catch (MalformedURLException ex) {
            item.setImageUrl(null);
        }
    }

    field = d.getField("locale");
    if (field != null) {
        item.setLocale(Locale.forLanguageTag(field.stringValue()));
    } else {
        item.setLocale(Locale.getDefault());
    }

    field = d.getField("timestamp");
    if (field != null) {
        item.setTimestamp(new Date(field.numericValue().longValue()));
    } else {
        item.setTimestamp(new Date());
    }

    field = d.getField("title");
    if (field != null) {
        item.setTitle(field.stringValue());
    } else {
        item.setTitle("");
    }

    field = d.getField("url");
    if (field != null) {
        try {
            item.setUrl(new URL(field.stringValue()));
        } catch (MalformedURLException ex) {
            item.setUrl(null);
        }
    } else {
        item.setTitle("");
    }

    field = d.getField("terms");
    if (field != null) {
        Map<String, Double> terms = gson.fromJson(field.stringValue(), HashMap.class);
        item.addTerms(terms);
    }

    return item;
}

From source file:br.bireme.ngrams.CompareResults.java

private static void writeDocDifferences(final String similarity, final Document doc1, final Document doc2,
        final BufferedWriter bwriter) throws IOException {
    assert similarity != null;
    assert doc1 != null;
    assert doc2 != null;
    assert bwriter != null;

    final StringBuilder builder = new StringBuilder();
    final Set<String> diff = new HashSet<>();
    final String id1 = doc1.get("id");
    final String id2 = doc2.get("id");

    for (IndexableField fld : doc1.getFields()) {
        final String name = fld.name();

        if (name.endsWith("~notnormalized")) {
            if (!name.startsWith("id~")) {
                final String value1 = fld.stringValue();
                final String value2 = doc2.get(name);
                if (((value1 == null) && (null != value2)) || !value1.equals(value2)) {
                    final String name2 = name.substring(0, name.lastIndexOf('~'));
                    diff.add("[" + name2 + "]|" + value1 + "|" + value2);
                }/*from  w w  w  . j  ava  2s .  co  m*/
            }
        }
    }
    if (diff.isEmpty()) {
        builder.append("<identical>|");
        builder.append(id1 + "|" + id2 + "\n");
    } else {
        if (similarity.equals("1.0")) {
            builder.append("<very similar>|");
        } else {
            builder.append("<similar>|");
        }
        builder.append(id1 + "|" + id2 + "\n");
        for (String di : diff) {
            builder.append(di);
            builder.append("\n");
        }
    }
    builder.append("\n");
    bwriter.append(builder.toString());
}

From source file:com.b2international.index.lucene.BooleanIndexField.java

License:Apache License

@Override
protected Boolean getValue(IndexableField field) {
    return convertFromString(field.stringValue());
}

From source file:com.b2international.index.lucene.StringIndexFieldBase.java

License:Apache License

@Override
public String getValue(IndexableField field) {
    return field.stringValue();
}

From source file:com.baidu.rigel.biplatform.tesseract.resultset.isservice.ResultRecord.java

License:Open Source License

/**
 * ResultRecord//from   w w  w . j  a  va  2  s .  c  o m
 * 
 * @param doc
 *            doc
 */
public ResultRecord(Document doc) {
    super();
    List<IndexableField> idxFields = doc.getFields();
    List<String> fieldNameList = new ArrayList<String>();
    List<String> fieldList = new ArrayList<String>();
    for (IndexableField field : idxFields) {
        fieldNameList.add(field.name());
        fieldList.add(field.stringValue());
    }

    this.fieldArray = fieldList.toArray(new String[0]);
    this.meta = new Meta(fieldNameList.toArray(new String[0]));
}

From source file:com.basistech.lucene.tools.LuceneQueryTool.java

License:Apache License

private void printDocument(Document doc, int id, float score, PrintStream out) {
    Multimap<String, String> data = ArrayListMultimap.create();
    List<String> orderedFieldNames = Lists.newArrayList();
    if (showId) {
        orderedFieldNames.add("<id>");
        data.put("<id>", Integer.toString(id));
    }//from   w  w  w .  ja va 2  s.c om
    if (showScore) {
        orderedFieldNames.add("<score>");
        data.put("<score>", Double.toString(score));
    }
    orderedFieldNames.addAll(fieldNames);

    Set<String> setFieldNames = Sets.newHashSet();
    if (fieldNames.isEmpty()) {
        for (IndexableField f : doc.getFields()) {
            if (!setFieldNames.contains(f.name())) {
                orderedFieldNames.add(f.name());
            }
            setFieldNames.add(f.name());
        }
    } else {
        setFieldNames.addAll(fieldNames);
    }
    if (sortFields) {
        Collections.sort(orderedFieldNames);
    }

    for (IndexableField f : doc.getFields()) {
        if (setFieldNames.contains(f.name())) {
            if (f.stringValue() != null) {
                data.put(f.name(), f.stringValue());
            } else if (f.binaryValue() != null) {
                data.put(f.name(), formatBinary(f.binaryValue().bytes));
            } else {
                data.put(f.name(), "null");
            }
        }
    }

    if (docsPrinted == 0 && formatter.getFormat() == Formatter.Format.TABULAR && !formatter.suppressNames()) {

        out.println(Joiner.on('\t').join(orderedFieldNames));
    }

    String formatted = formatter.format(orderedFieldNames, data);
    if (!formatted.isEmpty()) {
        if (docsPrinted > 0 && formatter.getFormat() == Formatter.Format.MULTILINE) {
            out.println();
        }
        out.println(formatted);
        ++docsPrinted;
    }
}

From source file:com.bericotech.clavin.index.IndexField.java

License:Apache License

/**
 * Get the value of this field as set in the given document or <code>null</code>
 * if the field is not set or cannot be retrieved.  If a field has multiple values,
 * the value that is returned may be arbitrarily selected from one of the values. In
 * this instance, use the methods in Document directly to retrieve multiple values.
 * @param <T> the expected return type
 * @param doc the input document/* w  w  w.  j av a2s .  co  m*/
 * @return the value of this field in the input document, if it has been set, or <code>null</code>
 */
@SuppressWarnings("unchecked")
public <T> T getValue(final Document doc) {
    IndexableField field = doc.getField(key);
    Object value = null;
    if (field != null) {
        switch (this) {
        case INDEX_NAME:
        case GEONAME:
        case PREFERRED_NAME:
            value = field.stringValue();
            break;
        case GEONAME_ID:
        case PARENT_ID:
        case ANCESTOR_IDS:
            value = field.numericValue().intValue();
            break;
        case POPULATION:
            value = field.numericValue().longValue();
            break;
        case SORT_POP:
            value = field.numericValue().longValue();
            break;
        case HISTORICAL:
        case FEATURE_CODE:
            // these fields are not stored
            LOG.warn("Attempting to retrieve value for an unstored field: [{}]", this);
            break;
        default:
            LOG.error("Attempting to retrieve value for an unconfigured field: [{}]", this);
            break;
        }
    }
    return (T) value;
}

From source file:com.bluedragon.search.search.QueryRun.java

License:Open Source License

private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount,
        int recordsSearched) throws CorruptIndexException, Exception {
    DocumentWrap document = new DocumentWrap(searcher.doc(docid));

    queryResultData.addRow(1);//from ww w . j a v  a2s .  co m
    queryResultData.setCurrentRow(queryResultData.getSize());

    // Add in the standard columns that we know we have for every search
    queryResultData.setCell(1, new cfStringData(document.getId()));
    queryResultData.setCell(2, new cfStringData(document.getName()));
    queryResultData.setCell(3, new cfNumberData(score));
    queryResultData.setCell(4, new cfNumberData(searchCount));
    queryResultData.setCell(5, new cfNumberData(recordsSearched));
    queryResultData.setCell(6, new cfNumberData(rank + 1));

    String uC = queryAttributes.getUniqueColumn();

    // Now we do the custom ones
    List<IndexableField> fields = document.getDocument().getFields();
    Iterator<IndexableField> it = fields.iterator();
    while (it.hasNext()) {
        IndexableField fieldable = it.next();

        String fieldName = fieldable.name().toLowerCase();

        // Check for the unique
        if (uniqueSet != null && fieldName.equals(uC)) {
            if (uniqueSet.contains(fieldable.stringValue())) {
                queryResultData.deleteRow(queryResultData.getSize());
                return;
            } else
                uniqueSet.add(fieldable.stringValue());
        }

        // Check to see if we have this column
        if (fieldName.equals("contents") && !queryAttributes.getContentFlag())
            continue;

        if (!activeColumns.containsKey(fieldName)) {
            int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(),
                    cfArrayData.createArray(1), null);
            activeColumns.put(fieldName, newcolumn);
        }

        int column = activeColumns.get(fieldName);
        if (column <= 6)
            continue;

        queryResultData.setCell(column, new cfStringData(fieldable.stringValue()));
    }

    // Do the context stuff if enable
    if (queryAttributes.getContextPassages() > 0) {

        Scorer scorer = new QueryScorer(queryAttributes.getQuery());
        SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(),
                queryAttributes.getContextHighlightEnd());
        Highlighter highlighter = new Highlighter(formatter, scorer);
        Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes());
        highlighter.setTextFragmenter(fragmenter);

        String nextContext = "";
        String contents = document.getAttribute(DocumentWrap.CONTENTS);

        if (contents != null) {
            TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS,
                    new StringReader(contents));
            String[] fragments = null;
            try {
                fragments = highlighter.getBestFragments(tokenStream, contents,
                        queryAttributes.getContextPassages());
                if (fragments.length == 1) {
                    nextContext = fragments[0] + "...";
                } else {
                    StringBuilder context = new StringBuilder();
                    for (int f = 0; f < fragments.length; f++) {
                        context.append("...");
                        context.append(fragments[f]);
                    }
                    context.append("...");
                    nextContext = context.toString();
                }
            } catch (Exception e) {
            }

            // Add in the context
            if (!activeColumns.containsKey("context")) {
                int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null);
                activeColumns.put("context", newcolumn);
            }

            queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext));
        }
    }
}