List of usage examples for org.apache.lucene.index IndexableField stringValue
public String stringValue();
From source file:KNearestNeighborClassifier.java
License:Apache License
/** * build a list of classification results from search results * @param topDocs the search results as a {@link TopDocs} object * @return a {@link List} of {@link ClassificationResult}, one for each existing class * @throws IOException if it's not possible to get the stored value of class field *///from www .ja v a 2s .c o m protected List<ClassificationResult<BytesRef>> buildListFromTopDocs(TopDocs topDocs) throws IOException { Map<BytesRef, Integer> classCounts = new HashMap<>(); Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs float maxScore = topDocs.getMaxScore(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField storableField = indexSearcher.doc(scoreDoc.doc).getField(classFieldName); if (storableField != null) { BytesRef cl = new BytesRef(storableField.stringValue()); //update count Integer count = classCounts.get(cl); if (count != null) { classCounts.put(cl, count + 1); } else { classCounts.put(cl, 1); } //update boost, the boost is based on the best score Double totalBoost = classBoosts.get(cl); double singleBoost = scoreDoc.score / maxScore; if (totalBoost != null) { classBoosts.put(cl, totalBoost + singleBoost); } else { classBoosts.put(cl, singleBoost); } } } List<ClassificationResult<BytesRef>> returnList = new ArrayList<>(); List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>(); int sumdoc = 0; for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) { Integer count = entry.getValue(); Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1 temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k)); sumdoc += count; } //correction if (sumdoc < k) { for (ClassificationResult<BytesRef> cr : temporaryList) { returnList.add( new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc)); } } else { returnList = temporaryList; } return returnList; }
From source file:alix.lucene.MoreLikeThis.java
License:Apache License
/** * Find words for a more-like-this query former. * * @param docNum the id of the lucene document from which to find terms *//*from w w w.ja v a 2 s.com*/ private PriorityQueue<ScoreTerm> retrieveTerms(int docNum) throws IOException { Map<String, Int> termFreqMap = new HashMap<>(); for (String fieldName : fieldNames) { final Terms vector = ir.getTermVector(docNum, fieldName); // field does not store term vector info if (vector == null) { Document d = ir.document(docNum); IndexableField[] fields = d.getFields(fieldName); for (IndexableField field : fields) { final String stringValue = field.stringValue(); if (stringValue != null) { addTermFrequencies(new StringReader(stringValue), termFreqMap, fieldName); } } } else { addTermFrequencies(termFreqMap, vector); } } return createQueue(termFreqMap); }
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.utils.NewsItemLuceneDocConverter.java
License:Apache License
/** * Converts a Lucene Document to a NewsItem * * @param d//from w w w . j av a 2 s. c om * @return */ public static RecommendedNewsItem documentToNewsItem(Document d) { RecommendedNewsItem item = new RecommendedNewsItem(); IndexableField field; field = d.getField("description"); if (field != null) { item.setDescription(field.stringValue()); } else { item.setDescription("No description available"); } field = d.getField("source"); if (field != null) { item.setSource(field.stringValue()); } else { item.setSource("No source available"); } field = d.getField("text"); if (field != null) { item.setFulltext(field.stringValue()); } else { item.setFulltext("No text available"); } field = d.getField("id"); if (field != null) { item.setId(field.stringValue()); } else { item.setId(""); } field = d.getField("imageUrl"); if (field != null) { try { item.setImageUrl(new URL(field.stringValue())); } catch (MalformedURLException ex) { item.setImageUrl(null); } } field = d.getField("locale"); if (field != null) { item.setLocale(Locale.forLanguageTag(field.stringValue())); } else { item.setLocale(Locale.getDefault()); } field = d.getField("timestamp"); if (field != null) { item.setTimestamp(new Date(field.numericValue().longValue())); } else { item.setTimestamp(new Date()); } field = d.getField("title"); if (field != null) { item.setTitle(field.stringValue()); } else { item.setTitle(""); } field = d.getField("url"); if (field != null) { try { item.setUrl(new URL(field.stringValue())); } catch (MalformedURLException ex) { item.setUrl(null); } } else { item.setTitle(""); } field = d.getField("terms"); if (field != null) { Map<String, Double> terms = gson.fromJson(field.stringValue(), HashMap.class); item.addTerms(terms); } return item; }
From source file:br.bireme.ngrams.CompareResults.java
private static void writeDocDifferences(final String similarity, final Document doc1, final Document doc2, final BufferedWriter bwriter) throws IOException { assert similarity != null; assert doc1 != null; assert doc2 != null; assert bwriter != null; final StringBuilder builder = new StringBuilder(); final Set<String> diff = new HashSet<>(); final String id1 = doc1.get("id"); final String id2 = doc2.get("id"); for (IndexableField fld : doc1.getFields()) { final String name = fld.name(); if (name.endsWith("~notnormalized")) { if (!name.startsWith("id~")) { final String value1 = fld.stringValue(); final String value2 = doc2.get(name); if (((value1 == null) && (null != value2)) || !value1.equals(value2)) { final String name2 = name.substring(0, name.lastIndexOf('~')); diff.add("[" + name2 + "]|" + value1 + "|" + value2); }/*from w w w . j ava 2s . co m*/ } } } if (diff.isEmpty()) { builder.append("<identical>|"); builder.append(id1 + "|" + id2 + "\n"); } else { if (similarity.equals("1.0")) { builder.append("<very similar>|"); } else { builder.append("<similar>|"); } builder.append(id1 + "|" + id2 + "\n"); for (String di : diff) { builder.append(di); builder.append("\n"); } } builder.append("\n"); bwriter.append(builder.toString()); }
From source file:com.b2international.index.lucene.BooleanIndexField.java
License:Apache License
@Override protected Boolean getValue(IndexableField field) { return convertFromString(field.stringValue()); }
From source file:com.b2international.index.lucene.StringIndexFieldBase.java
License:Apache License
@Override public String getValue(IndexableField field) { return field.stringValue(); }
From source file:com.baidu.rigel.biplatform.tesseract.resultset.isservice.ResultRecord.java
License:Open Source License
/** * ResultRecord//from w w w . j a va 2 s . c o m * * @param doc * doc */ public ResultRecord(Document doc) { super(); List<IndexableField> idxFields = doc.getFields(); List<String> fieldNameList = new ArrayList<String>(); List<String> fieldList = new ArrayList<String>(); for (IndexableField field : idxFields) { fieldNameList.add(field.name()); fieldList.add(field.stringValue()); } this.fieldArray = fieldList.toArray(new String[0]); this.meta = new Meta(fieldNameList.toArray(new String[0])); }
From source file:com.basistech.lucene.tools.LuceneQueryTool.java
License:Apache License
private void printDocument(Document doc, int id, float score, PrintStream out) { Multimap<String, String> data = ArrayListMultimap.create(); List<String> orderedFieldNames = Lists.newArrayList(); if (showId) { orderedFieldNames.add("<id>"); data.put("<id>", Integer.toString(id)); }//from w w w . ja va 2 s.c om if (showScore) { orderedFieldNames.add("<score>"); data.put("<score>", Double.toString(score)); } orderedFieldNames.addAll(fieldNames); Set<String> setFieldNames = Sets.newHashSet(); if (fieldNames.isEmpty()) { for (IndexableField f : doc.getFields()) { if (!setFieldNames.contains(f.name())) { orderedFieldNames.add(f.name()); } setFieldNames.add(f.name()); } } else { setFieldNames.addAll(fieldNames); } if (sortFields) { Collections.sort(orderedFieldNames); } for (IndexableField f : doc.getFields()) { if (setFieldNames.contains(f.name())) { if (f.stringValue() != null) { data.put(f.name(), f.stringValue()); } else if (f.binaryValue() != null) { data.put(f.name(), formatBinary(f.binaryValue().bytes)); } else { data.put(f.name(), "null"); } } } if (docsPrinted == 0 && formatter.getFormat() == Formatter.Format.TABULAR && !formatter.suppressNames()) { out.println(Joiner.on('\t').join(orderedFieldNames)); } String formatted = formatter.format(orderedFieldNames, data); if (!formatted.isEmpty()) { if (docsPrinted > 0 && formatter.getFormat() == Formatter.Format.MULTILINE) { out.println(); } out.println(formatted); ++docsPrinted; } }
From source file:com.bericotech.clavin.index.IndexField.java
License:Apache License
/** * Get the value of this field as set in the given document or <code>null</code> * if the field is not set or cannot be retrieved. If a field has multiple values, * the value that is returned may be arbitrarily selected from one of the values. In * this instance, use the methods in Document directly to retrieve multiple values. * @param <T> the expected return type * @param doc the input document/* w w w. j av a2s . co m*/ * @return the value of this field in the input document, if it has been set, or <code>null</code> */ @SuppressWarnings("unchecked") public <T> T getValue(final Document doc) { IndexableField field = doc.getField(key); Object value = null; if (field != null) { switch (this) { case INDEX_NAME: case GEONAME: case PREFERRED_NAME: value = field.stringValue(); break; case GEONAME_ID: case PARENT_ID: case ANCESTOR_IDS: value = field.numericValue().intValue(); break; case POPULATION: value = field.numericValue().longValue(); break; case SORT_POP: value = field.numericValue().longValue(); break; case HISTORICAL: case FEATURE_CODE: // these fields are not stored LOG.warn("Attempting to retrieve value for an unstored field: [{}]", this); break; default: LOG.error("Attempting to retrieve value for an unconfigured field: [{}]", this); break; } } return (T) value; }
From source file:com.bluedragon.search.search.QueryRun.java
License:Open Source License
private void addRow(IndexSearcher searcher, int docid, float score, int rank, int searchCount, int recordsSearched) throws CorruptIndexException, Exception { DocumentWrap document = new DocumentWrap(searcher.doc(docid)); queryResultData.addRow(1);//from ww w . j a v a2s . co m queryResultData.setCurrentRow(queryResultData.getSize()); // Add in the standard columns that we know we have for every search queryResultData.setCell(1, new cfStringData(document.getId())); queryResultData.setCell(2, new cfStringData(document.getName())); queryResultData.setCell(3, new cfNumberData(score)); queryResultData.setCell(4, new cfNumberData(searchCount)); queryResultData.setCell(5, new cfNumberData(recordsSearched)); queryResultData.setCell(6, new cfNumberData(rank + 1)); String uC = queryAttributes.getUniqueColumn(); // Now we do the custom ones List<IndexableField> fields = document.getDocument().getFields(); Iterator<IndexableField> it = fields.iterator(); while (it.hasNext()) { IndexableField fieldable = it.next(); String fieldName = fieldable.name().toLowerCase(); // Check for the unique if (uniqueSet != null && fieldName.equals(uC)) { if (uniqueSet.contains(fieldable.stringValue())) { queryResultData.deleteRow(queryResultData.getSize()); return; } else uniqueSet.add(fieldable.stringValue()); } // Check to see if we have this column if (fieldName.equals("contents") && !queryAttributes.getContentFlag()) continue; if (!activeColumns.containsKey(fieldName)) { int newcolumn = queryResultData.addColumnData(fieldable.name().toUpperCase(), cfArrayData.createArray(1), null); activeColumns.put(fieldName, newcolumn); } int column = activeColumns.get(fieldName); if (column <= 6) continue; queryResultData.setCell(column, new cfStringData(fieldable.stringValue())); } // Do the context stuff if enable if (queryAttributes.getContextPassages() > 0) { Scorer scorer = new QueryScorer(queryAttributes.getQuery()); SimpleHTMLFormatter formatter = new SimpleHTMLFormatter(queryAttributes.getContextHighlightStart(), queryAttributes.getContextHighlightEnd()); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(queryAttributes.getContextBytes()); highlighter.setTextFragmenter(fragmenter); String nextContext = ""; String contents = document.getAttribute(DocumentWrap.CONTENTS); if (contents != null) { TokenStream tokenStream = AnalyzerFactory.get("simple").tokenStream(DocumentWrap.CONTENTS, new StringReader(contents)); String[] fragments = null; try { fragments = highlighter.getBestFragments(tokenStream, contents, queryAttributes.getContextPassages()); if (fragments.length == 1) { nextContext = fragments[0] + "..."; } else { StringBuilder context = new StringBuilder(); for (int f = 0; f < fragments.length; f++) { context.append("..."); context.append(fragments[f]); } context.append("..."); nextContext = context.toString(); } } catch (Exception e) { } // Add in the context if (!activeColumns.containsKey("context")) { int newcolumn = queryResultData.addColumnData("CONTEXT", cfArrayData.createArray(1), null); activeColumns.put("context", newcolumn); } queryResultData.setCell(activeColumns.get("context"), new cfStringData(nextContext)); } } }