List of usage examples for org.apache.lucene.index IndexableField numericValue
public Number numericValue();
From source file:be.ugent.tiwi.sleroux.newsrec.newsreclib.utils.NewsItemLuceneDocConverter.java
License:Apache License
/** * Converts a Lucene Document to a NewsItem * * @param d//from w w w. j a va 2 s . c om * @return */ public static RecommendedNewsItem documentToNewsItem(Document d) { RecommendedNewsItem item = new RecommendedNewsItem(); IndexableField field; field = d.getField("description"); if (field != null) { item.setDescription(field.stringValue()); } else { item.setDescription("No description available"); } field = d.getField("source"); if (field != null) { item.setSource(field.stringValue()); } else { item.setSource("No source available"); } field = d.getField("text"); if (field != null) { item.setFulltext(field.stringValue()); } else { item.setFulltext("No text available"); } field = d.getField("id"); if (field != null) { item.setId(field.stringValue()); } else { item.setId(""); } field = d.getField("imageUrl"); if (field != null) { try { item.setImageUrl(new URL(field.stringValue())); } catch (MalformedURLException ex) { item.setImageUrl(null); } } field = d.getField("locale"); if (field != null) { item.setLocale(Locale.forLanguageTag(field.stringValue())); } else { item.setLocale(Locale.getDefault()); } field = d.getField("timestamp"); if (field != null) { item.setTimestamp(new Date(field.numericValue().longValue())); } else { item.setTimestamp(new Date()); } field = d.getField("title"); if (field != null) { item.setTitle(field.stringValue()); } else { item.setTitle(""); } field = d.getField("url"); if (field != null) { try { item.setUrl(new URL(field.stringValue())); } catch (MalformedURLException ex) { item.setUrl(null); } } else { item.setTitle(""); } field = d.getField("terms"); if (field != null) { Map<String, Double> terms = gson.fromJson(field.stringValue(), HashMap.class); item.addTerms(terms); } return item; }
From source file:ca.dracode.ais.service.SearchService.java
License:Open Source License
/** * Tells the indexer to load a file's metadata into memory for use in searches. * The function can be called multiple times to load several files. Files remain loaded until the unload * function is called. Please make sure to call unload when you are finished with the document. * @param filePath - the location of the file to prepare; is also the identifier for the file's data in the index * @return 0 if the file exists in the index and was not already loaded; * 1 if the file was already loaded; * 2 if the file was not loaded and does not exist in the index; * -1 if there was an error//from w w w . j a v a 2s . c o m */ public int load(final String filePath) { if (this.data.containsKey(filePath)) { return 1; } SearchData tmpData = new SearchData(); Document tmp; if (SearchService.this.sm.searcher == null) { Log.e(TAG, "Searcher is null"); return -1; } Log.i(TAG, "Loading: " + filePath + " " + new File(filePath).getAbsolutePath()); if ((tmp = this.sm.searcher.getMetaFile(new File(filePath).getAbsolutePath())) != null) { try { IndexableField f = tmp.getField("pages"); if (f == null) { Log.e(TAG, "Cannot find pages in metafile: " + tmp.toString()); return -1; } else { tmpData.pages = f.numericValue().intValue(); } } catch (Exception e) { Log.e(TAG, "Error", e); return -1; } this.data.put(filePath, tmpData); return 0; } else { return 2; } }
From source file:ca.uhn.fhir.jpa.util.BigDecimalNumericFieldBridge.java
License:Apache License
@Override public Object get(final String name, final Document document) { final IndexableField field = document.getField(name); if (field != null) { Double doubleVal = (Double) field.numericValue(); return new BigDecimal(doubleVal); } else {// w w w . j av a 2 s.co m return null; } }
From source file:com.b2international.index.lucene.FloatIndexField.java
License:Apache License
@Override protected Float getValue(IndexableField field) { return field.numericValue().floatValue(); }
From source file:com.b2international.index.lucene.IntIndexField.java
License:Apache License
private Number getNumber(IndexableField field) { return checkNotNull(field.numericValue(), "Cannot get numeric value from field '%s'"); }
From source file:com.b2international.index.lucene.LongIndexField.java
License:Apache License
@Override public Long getValue(IndexableField field) { final Number num = field.numericValue(); checkNotNull(num, "Cannot get numeric value from field '%s'"); return num.longValue(); }
From source file:com.bericotech.clavin.gazetteer.query.LuceneGazetteer.java
License:Apache License
/** * Executes a query against the Lucene index, processing the results and returning * at most maxResults ResolvedLocations with ancestry resolved. * @param location the location occurrence * @param sanitizedName the sanitized name of the search location * @param filter the filter used to restrict the search results * @param maxResults the maximum number of results * @param fuzzy is this a fuzzy query/*from w w w . ja v a2 s . c o m*/ * @param dedupe should duplicate locations be filtered from the results * @param ancestryMode the hierarchy resolution mode * @param previousResults the results of a previous query that should be used for duplicate filtering and appended to until * no additional matches are found or maxResults has been reached; the input list will not be modified * and may be <code>null</code> * @return the ResolvedLocations with ancestry resolved matching the query * @throws ParseException if an error occurs generating the query * @throws IOException if an error occurs executing the query */ private List<ResolvedLocation> executeQuery(final LocationOccurrence location, final String sanitizedName, final Filter filter, final int maxResults, final boolean fuzzy, final boolean dedupe, final AncestryMode ancestryMode, final List<ResolvedLocation> previousResults) throws ParseException, IOException { Query query = new AnalyzingQueryParser(Version.LUCENE_4_9, INDEX_NAME.key(), INDEX_ANALYZER) .parse(String.format(fuzzy ? FUZZY_FMT : EXACT_MATCH_FMT, sanitizedName)); List<ResolvedLocation> matches = new ArrayList<ResolvedLocation>(maxResults); Map<Integer, Set<GeoName>> parentMap = new HashMap<Integer, Set<GeoName>>(); // reuse GeoName instances so all ancestry is correctly resolved if multiple names for // the same GeoName match the query Map<Integer, GeoName> geonameMap = new HashMap<Integer, GeoName>(); // if we are filling previous results, add them to the match list and the geoname map // so they can be used for deduplication or re-used if additional matches are found if (previousResults != null) { matches.addAll(previousResults); for (ResolvedLocation loc : previousResults) { geonameMap.put(loc.getGeoname().getGeonameID(), loc.getGeoname()); } } // short circuit if we were provided enough previous results to satisfy maxResults; // we do this here because the query loop condition is evaluated after the query // is executed and results are processed to support de-duplication if (matches.size() >= maxResults) { return matches; } // track the last discovered hit so we can re-execute the query if we are // deduping and need to fill results ScoreDoc lastDoc = null; do { // collect all the hits up to maxResults, and sort them based // on Lucene match score and population for the associated // GeoNames record TopDocs results = indexSearcher.searchAfter(lastDoc, query, filter, maxResults, POPULATION_SORT); // set lastDoc to null so we don't infinite loop if results is empty lastDoc = null; // populate results if matches were discovered for (ScoreDoc scoreDoc : results.scoreDocs) { lastDoc = scoreDoc; Document doc = indexSearcher.doc(scoreDoc.doc); // reuse GeoName instances so all ancestry is correctly resolved if multiple names for // the same GeoName match the query int geonameID = GEONAME_ID.getValue(doc); GeoName geoname = geonameMap.get(geonameID); if (geoname == null) { geoname = BasicGeoName.parseFromGeoNamesRecord((String) GEONAME.getValue(doc), (String) PREFERRED_NAME.getValue(doc)); geonameMap.put(geonameID, geoname); } else if (dedupe) { // if we have already seen this GeoName and we are removing duplicates, skip to the next doc continue; } String matchedName = INDEX_NAME.getValue(doc); if (!geoname.isAncestryResolved()) { IndexableField parentIdField = doc.getField(IndexField.PARENT_ID.key()); Integer parentId = parentIdField != null && parentIdField.numericValue() != null ? parentIdField.numericValue().intValue() : null; if (parentId != null) { // if we are lazily or manually loading ancestry, replace GeoName with a LazyAncestryGeoName // otherwide, build the parent resolution map switch (ancestryMode) { case LAZY: geoname = new LazyAncestryGeoName(geoname, parentId, this); break; case MANUAL: geoname = new LazyAncestryGeoName(geoname, parentId); break; case ON_CREATE: Set<GeoName> geos = parentMap.get(parentId); if (geos == null) { geos = new HashSet<GeoName>(); parentMap.put(parentId, geos); } geos.add(geoname); break; } } } matches.add(new ResolvedLocation(location, geoname, matchedName, fuzzy)); // stop processing results if we have reached maxResults matches if (matches.size() >= maxResults) { break; } } } while (dedupe && lastDoc != null && matches.size() < maxResults); // if any results need ancestry resolution, resolve parents // this map should only contain GeoNames if ancestryMode == ON_CREATE if (!parentMap.isEmpty()) { resolveParents(parentMap); } return matches; }
From source file:com.bericotech.clavin.index.IndexField.java
License:Apache License
/** * Get the value of this field as set in the given document or <code>null</code> * if the field is not set or cannot be retrieved. If a field has multiple values, * the value that is returned may be arbitrarily selected from one of the values. In * this instance, use the methods in Document directly to retrieve multiple values. * @param <T> the expected return type * @param doc the input document// w w w .j ava 2s .c o m * @return the value of this field in the input document, if it has been set, or <code>null</code> */ @SuppressWarnings("unchecked") public <T> T getValue(final Document doc) { IndexableField field = doc.getField(key); Object value = null; if (field != null) { switch (this) { case INDEX_NAME: case GEONAME: case PREFERRED_NAME: value = field.stringValue(); break; case GEONAME_ID: case PARENT_ID: case ANCESTOR_IDS: value = field.numericValue().intValue(); break; case POPULATION: value = field.numericValue().longValue(); break; case SORT_POP: value = field.numericValue().longValue(); break; case HISTORICAL: case FEATURE_CODE: // these fields are not stored LOG.warn("Attempting to retrieve value for an unstored field: [{}]", this); break; default: LOG.error("Attempting to retrieve value for an unconfigured field: [{}]", this); break; } } return (T) value; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private VcfIndexEntry createVcfIndexEntry(Document d, List<String> vcfInfoFields) { VcfIndexEntry vcfIndexEntry = new VcfIndexEntry(); vcfIndexEntry.setGene(d.get(FeatureIndexFields.GENE_ID.getFieldName())); BytesRef bytes = d.getBinaryValue(FeatureIndexFields.GENE_IDS.getFieldName()); if (bytes != null) { vcfIndexEntry.setGeneIds(bytes.utf8ToString()); }//from w ww . j a va2 s.co m vcfIndexEntry.setGeneName(d.get(FeatureIndexFields.GENE_NAME.getFieldName())); bytes = d.getBinaryValue(FeatureIndexFields.GENE_NAMES.getFieldName()); if (bytes != null) { vcfIndexEntry.setGeneNames(bytes.utf8ToString()); } vcfIndexEntry.setInfo(new HashMap<>()); String isExonStr = d.get(FeatureIndexFields.IS_EXON.getFieldName()); //TODO: remove, in future only binary // value will remain if (isExonStr == null) { bytes = d.getBinaryValue(FeatureIndexFields.IS_EXON.getFieldName()); if (bytes != null) { isExonStr = bytes.utf8ToString(); } } boolean isExon = isExonStr != null && Boolean.parseBoolean(isExonStr); vcfIndexEntry.setExon(isExon); vcfIndexEntry.getInfo().put(FeatureIndexFields.IS_EXON.getFieldName(), isExon); BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.VARIATION_TYPE.getFieldName()); if (featureIdBytes != null) { vcfIndexEntry.setVariationType(VariationType.valueOf(featureIdBytes.utf8ToString().toUpperCase())); } vcfIndexEntry.setFailedFilter(d.get(FeatureIndexFields.FAILED_FILTER.getFieldName())); IndexableField qualityField = d.getField(FeatureIndexFields.QUALITY.getFieldName()); if (qualityField != null) { vcfIndexEntry.setQuality(qualityField.numericValue().doubleValue()); } if (vcfInfoFields != null) { for (String infoField : vcfInfoFields) { if (d.getBinaryValue(infoField.toLowerCase()) != null) { vcfIndexEntry.getInfo().put(infoField, d.getBinaryValue(infoField.toLowerCase()).utf8ToString()); } else { vcfIndexEntry.getInfo().put(infoField, d.get(infoField.toLowerCase())); } } } return vcfIndexEntry; }
From source file:com.epimorphics.server.indexers.LuceneResult.java
License:Apache License
/** * Returns all the values of a field. These will be either Strings (for literals and labels), * Resources (for URI fields) or Longs (for numeric fields) *//*w w w. ja v a 2 s . com*/ public Object[] fieldValues(String fieldName) { IndexableField[] fields = doc.getFields(fieldName); Object[] results = new Object[fields.length]; for (int i = 0; i < fields.length; i++) { IndexableField field = fields[i]; Object value = field.numericValue(); if (value == null) { value = field.stringValue(); } if (value == null) { BytesRef ref = field.binaryValue(); value = ResourceFactory.createResource(ref.utf8ToString()); } results[i] = value; } return results; }