List of usage examples for org.apache.lucene.index MultiDocValues getBinaryValues
public static BinaryDocValues getBinaryValues(final IndexReader r, final String field) throws IOException
From source file:com.github.flaxsearch.util.ReaderManager.java
License:Apache License
default BinaryDocValues getBinaryDocValues(Integer segment, String field) throws IOException { if (segment == null) return MultiDocValues.getBinaryValues(getIndexReader(), field); return getLeafReader(segment).getBinaryDocValues(field); }
From source file:net.semanticmetadata.lire.searchers.GenericDocValuesImageSearcher.java
License:Open Source License
protected void init() { // put all respective features into an in-memory cache ... if (reader != null) { try {//from ww w . j a va2 s. co m docValues = MultiDocValues.getBinaryValues(reader, cachedInstance.getFieldName()); } catch (IOException e) { e.printStackTrace(); } } }
From source file:net.semanticmetadata.lire.solr.FastLireRequestHandler.java
License:Open Source License
/** * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking. * * @param rsp/*from ww w.j a v a 2 s . c om*/ * @param searcher * @param hashFieldName the hash field name * @param maximumHits * @param terms * @param queryFeature * @throws java.io.IOException * @throws IllegalAccessException * @throws InstantiationException */ private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher, String hashFieldName, int maximumHits, List<Term> terms, Query query, LireFeature queryFeature) throws IOException, IllegalAccessException, InstantiationException { // temp feature instance LireFeature tmpFeature = queryFeature.getClass().newInstance(); // Taking the time of search for statistical purposes. time = System.currentTimeMillis(); Filter filter = null; // if the request contains a filter: if (req.getParams().get("fq") != null) { // only filters with [<field>:<value> ]+ are supported StringTokenizer st = new StringTokenizer(req.getParams().get("fq"), " "); LinkedList<Term> filterTerms = new LinkedList<Term>(); while (st.hasMoreElements()) { String[] tmpToken = st.nextToken().split(":"); if (tmpToken.length > 1) { filterTerms.add(new Term(tmpToken[0], tmpToken[1])); } } if (filterTerms.size() > 0) filter = new TermsFilter(filterTerms); } TopDocs docs; // with query only. if (filter == null) { docs = searcher.search(query, numberOfCandidateResults); } else { docs = searcher.search(query, filter, numberOfCandidateResults); } // TopDocs docs = searcher.search(query, new TermsFilter(terms), numberOfCandidateResults); // with TermsFilter and boosting by simple query // TopDocs docs = searcher.search(new ConstantScoreQuery(new TermsFilter(terms)), numberOfCandidateResults); // just with TermsFilter time = System.currentTimeMillis() - time; rsp.add("RawDocsCount", docs.scoreDocs.length + ""); rsp.add("RawDocsSearchTime", time + ""); // re-rank time = System.currentTimeMillis(); TreeSet<SimpleResult> resultScoreDocs = new TreeSet<SimpleResult>(); float maxDistance = -1f; float tmpScore; String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName); // iterating and re-ranking the documents. BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); // *** # BytesRef bytesRef = new BytesRef(); for (int i = 0; i < docs.scoreDocs.length; i++) { // using DocValues to retrieve the field values ... binaryValues.get(docs.scoreDocs[i].doc, bytesRef); tmpFeature.setByteArrayRepresentation(bytesRef.bytes, bytesRef.offset, bytesRef.length); // Getting the document from the index. // This is the slow step based on the field compression of stored fields. // tmpFeature.setByteArrayRepresentation(d.getBinaryValue(name).bytes, d.getBinaryValue(name).offset, d.getBinaryValue(name).length); tmpScore = queryFeature.getDistance(tmpFeature); if (resultScoreDocs.size() < maximumHits) { // todo: There's potential here for a memory saver, think of a clever data structure that can do the trick without creating a new SimpleResult for each result. resultScoreDocs.add( new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); maxDistance = resultScoreDocs.last().getDistance(); } else if (tmpScore < maxDistance) { // if it is nearer to the sample than at least one of the current set: // remove the last one ... resultScoreDocs.remove(resultScoreDocs.last()); // add the new one ... resultScoreDocs.add( new SimpleResult(tmpScore, searcher.doc(docs.scoreDocs[i].doc), docs.scoreDocs[i].doc)); // and set our new distance border ... maxDistance = resultScoreDocs.last().getDistance(); } } // System.out.println("** Creating response."); time = System.currentTimeMillis() - time; rsp.add("ReRankSearchTime", time + ""); LinkedList list = new LinkedList(); for (Iterator<SimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) { SimpleResult result = it.next(); HashMap m = new HashMap(2); m.put("d", result.getDistance()); // add fields as requested: if (req.getParams().get("fl") == null) { m.put("id", result.getDocument().get("id")); if (result.getDocument().get("title") != null) m.put("title", result.getDocument().get("title")); } else { String fieldsRequested = req.getParams().get("fl"); if (fieldsRequested.contains("score")) { m.put("score", result.getDistance()); } if (fieldsRequested.contains("*")) { // all fields for (IndexableField field : result.getDocument().getFields()) { String tmpField = field.name(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } else { StringTokenizer st; if (fieldsRequested.contains(",")) st = new StringTokenizer(fieldsRequested, ","); else st = new StringTokenizer(fieldsRequested, " "); while (st.hasMoreElements()) { String tmpField = st.nextToken(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } } // m.put(field, result.getDocument().get(field)); // m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field)); list.add(m); } rsp.add("docs", list); // rsp.add("Test-name", "Test-val"); }
From source file:net.semanticmetadata.lire.solr.LireRequestHandler.java
License:Open Source License
/** * Handles the get parameters id, field and rows. * * @param req//from w w w . ja va 2s . c o m * @param rsp * @throws IOException * @throws InstantiationException * @throws IllegalAccessException */ private void handleIdSearch(SolrQueryRequest req, SolrQueryResponse rsp) throws IOException, InstantiationException, IllegalAccessException { SolrIndexSearcher searcher = req.getSearcher(); try { // TopDocs hits = searcher.search(new TermQuery(new Term("id", req.getParams().get("id"))), 1); int queryDocId = searcher.getFirstMatch(new Term("id", req.getParams().get("id"))); // get the parameters String paramField = req.getParams().get("field", "cl_ha"); if (!paramField.endsWith("_ha")) paramField += "_ha"; numberOfQueryTerms = req.getParams().getDouble("accuracy", DEFAULT_NUMBER_OF_QUERY_TERMS); numberOfCandidateResults = req.getParams().getInt("candidates", DEFAULT_NUMBER_OF_CANDIDATES); useMetricSpaces = req.getParams().getBool("ms", DEFAULT_USE_METRIC_SPACES); int paramRows = req.getParams().getInt("rows", defaultNumberOfResults); GlobalFeature queryFeature = (GlobalFeature) FeatureRegistry.getClassForHashField(paramField) .newInstance(); rsp.add("QueryField", paramField); rsp.add("QueryFeature", queryFeature.getClass().getName()); if (queryDocId > -1) { // Using DocValues to get the actual data from the index. BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), FeatureRegistry.getFeatureFieldName(paramField)); if (binaryValues == null) { rsp.add("Error", "Could not find the DocValues of the query document. Are they in the index? Id: " + req.getParams().get("id")); // System.err.println("Could not find the DocValues of the query document. Are they in the index?"); } queryFeature.setByteArrayRepresentation(binaryValues.get(queryDocId).bytes, binaryValues.get(queryDocId).offset, binaryValues.get(queryDocId).length); Query query = null; if (!useMetricSpaces) { // check singleton cache if the term stats can be cached. HashTermStatistics.addToStatistics(searcher, paramField); // Re-generating the hashes to save space (instead of storing them in the index) int[] hashes = BitSampling.generateHashes(queryFeature.getFeatureVector()); query = createQuery(hashes, paramField, numberOfQueryTerms); } else if (MetricSpaces.supportsFeature(queryFeature)) { // ----< Metric Spaces >----- int queryLength = (int) StatsUtils.clamp( numberOfQueryTerms * MetricSpaces.getPostingListLength(queryFeature), 3, MetricSpaces.getPostingListLength(queryFeature)); String msQuery = MetricSpaces.generateBoostedQuery(queryFeature, queryLength); QueryParser qp = new QueryParser(paramField.replace("_ha", "_ms"), new WhitespaceAnalyzer()); query = qp.parse(msQuery); } else { query = new MatchAllDocsQuery(); rsp.add("Error", "Feature not supported by MetricSpaces: " + queryFeature.getClass().getSimpleName()); } doSearch(req, rsp, searcher, paramField, paramRows, getFilterQuery(req.getParams().get("fq")), query, queryFeature); } else { rsp.add("Error", "Did not find an image with the given id " + req.getParams().get("id")); } } catch (Exception e) { rsp.add("Error", "There was an error with your search for the image with the id " + req.getParams().get("id") + ": " + e.getMessage()); } }
From source file:net.semanticmetadata.lire.solr.LireRequestHandler.java
License:Open Source License
/** * Actual search implementation based on (i) hash based retrieval and (ii) feature based re-ranking. * * @param req the SolrQueryRequest * @param rsp the response to write the data to * @param searcher the actual index searcher object to search the index * @param hashFieldName the name of the field the hashes can be found * @param maximumHits the maximum number of hits, the smaller the faster * @param filterQuery can be null/* w ww .j a va 2s . c o m*/ * @param query the (Boolean) query for querying the candidates from the IndexSearcher * @param queryFeature the image feature used for re-ranking the results * @throws IOException * @throws IllegalAccessException * @throws InstantiationException */ private void doSearch(SolrQueryRequest req, SolrQueryResponse rsp, SolrIndexSearcher searcher, String hashFieldName, int maximumHits, Query filterQuery, Query query, GlobalFeature queryFeature) throws IOException, IllegalAccessException, InstantiationException { // temp feature instance GlobalFeature tmpFeature = queryFeature.getClass().newInstance(); // Taking the time of search for statistical purposes. time = System.currentTimeMillis(); String featureFieldName = FeatureRegistry.getFeatureFieldName(hashFieldName); BinaryDocValues binaryValues = MultiDocValues.getBinaryValues(searcher.getIndexReader(), featureFieldName); time = System.currentTimeMillis() - time; rsp.add("DocValuesOpenTime", time + ""); Iterator<Integer> docIterator; int numberOfResults = 0; time = System.currentTimeMillis(); if (filterQuery != null) { DocList docList = searcher.getDocList(query, filterQuery, Sort.RELEVANCE, 0, numberOfCandidateResults); numberOfResults = docList.size(); docIterator = docList.iterator(); } else { TopDocs docs = searcher.search(query, numberOfCandidateResults); numberOfResults = docs.totalHits; docIterator = new TopDocsIterator(docs); } time = System.currentTimeMillis() - time; rsp.add("RawDocsCount", numberOfResults + ""); rsp.add("RawDocsSearchTime", time + ""); time = System.currentTimeMillis(); TreeSet<CachingSimpleResult> resultScoreDocs = getReRankedResults(docIterator, binaryValues, queryFeature, tmpFeature, maximumHits, searcher); // Creating response ... time = System.currentTimeMillis() - time; rsp.add("ReRankSearchTime", time + ""); LinkedList list = new LinkedList(); for (Iterator<CachingSimpleResult> it = resultScoreDocs.iterator(); it.hasNext();) { CachingSimpleResult result = it.next(); HashMap m = new HashMap(2); m.put("d", result.getDistance()); // add fields as requested: if (req.getParams().get("fl") == null) { m.put("id", result.getDocument().get("id")); if (result.getDocument().get("title") != null) m.put("title", result.getDocument().get("title")); } else { String fieldsRequested = req.getParams().get("fl"); if (fieldsRequested.contains("score")) { m.put("score", result.getDistance()); } if (fieldsRequested.contains("*")) { // all fields for (IndexableField field : result.getDocument().getFields()) { String tmpField = field.name(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } else { StringTokenizer st; if (fieldsRequested.contains(",")) st = new StringTokenizer(fieldsRequested, ","); else st = new StringTokenizer(fieldsRequested, " "); while (st.hasMoreElements()) { String tmpField = st.nextToken(); if (result.getDocument().getFields(tmpField).length > 1) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getValues(tmpField)); } else if (result.getDocument().getFields(tmpField).length > 0) { m.put(result.getDocument().getFields(tmpField)[0].name(), result.getDocument().getFields(tmpField)[0].stringValue()); } } } } // m.put(field, result.getDocument().get(field)); // m.put(field.replace("_ha", "_hi"), result.getDocument().getBinaryValue(field)); list.add(m); } rsp.add("docs", list); // rsp.add("Test-name", "Test-val"); }
From source file:org.apache.solr.index.SlowCompositeReaderWrapper.java
License:Apache License
@Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { ensureOpen(); return MultiDocValues.getBinaryValues(in, field); }
From source file:org.apache.tika.eval.tools.SlowCompositeReaderWrapper.java
License:Apache License
@Override public BinaryDocValues getBinaryDocValues(String field) throws IOException { ensureOpen();/*ww w . jav a 2s .com*/ return MultiDocValues.getBinaryValues(in, field); // TODO cache? }