List of usage examples for org.apache.lucene.util BytesRef utf8ToString
public String utf8ToString()
From source file:ci6226.facetsearch.java
public static void main(String[] args) throws Exception { String index = "./myindex"; String field = "text"; String queries = null;//from w w w. ja va 2s . co m int hitsPerPage = 10; boolean raw = false; //http://lucene.apache.org/core/4_0_0/facet/org/apache/lucene/facet/doc-files/userguide.html#facet_accumulation IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: //TODO: SAME AS HOW U BUILD INDEX Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer); while (true) { System.out.println("Enter query: "); String line = in.readLine(); line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); Date start = new Date(); searcher.search(query, null, 100); Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); TopDocs results = searcher.search(query, 5 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; //N= max docs //df = totoal matched doc //idf=log(N/df) for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score); String rtext = doc.get(field); System.out.println("Text=\t" + rtext); Terms vector = reader.getTermVector(i, "text"); if (vector == null) continue; // System.out.println(vector.getSumDocFreq()); // Terms vector = reader.getTermVector(hits[i].doc, field); //hits[i].doc=docID TermsEnum termsEnum = vector.iterator(null); termsEnum = vector.iterator(termsEnum); Map<String, Integer> frequencies = new HashMap<>(); BytesRef text = null; while ((text = termsEnum.next()) != null) { String term = text.utf8ToString(); int freq = (int) termsEnum.totalTermFreq(); frequencies.put(term, freq); // System.out.println("Time: "+term + " idef "+freq); } } // String[] facetCatlog={""}; System.out.println(numTotalHits + " total matching documents"); } reader.close(); }
From source file:com.baidu.rigel.biplatform.tesseract.isservice.search.collector.TesseractResultRecordCollector.java
License:Open Source License
@Override public void collect(int doc) throws IOException { List<Serializable> fieldValueList = new ArrayList<Serializable>(); // List<String> fieldNameList=new ArrayList<String>(); String groupBy = ""; for (String dim : dimFields) { BinaryDocValues fieldValues = currBinaryDocValuesMap.get(dim); BytesRef byteRef = fieldValues.get(doc); String dimVal = byteRef.utf8ToString(); fieldValueList.add(dimVal);//from w ww.j a va 2s. c o m if (groupByFields.contains(dim)) { groupBy += dimVal + ","; } } for (String measure : this.measureFields) { FieldCache.Doubles fieldValues = currDoubleValuesMap.get(measure); fieldValueList.add(fieldValues.get(doc)); } ResultRecord record = new ResultRecord(fieldValueList.toArray(new Serializable[0]), this.meta); record.setGroupBy(groupBy); this.result.add(record); }
From source file:com.basho.yokozuna.handler.EntropyData.java
License:Open Source License
@Override public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception, InstantiationException, IllegalAccessException { String contParam = req.getParams().get("continue"); BytesRef cont = contParam != null ? decodeCont(contParam) : DEFAULT_CONT; // TODO: Make before required in handler config String before = req.getParams().get("before"); if (before == null) { throw new Exception("Parameter 'before' is required"); }/*from w w w . j a v a2s .c om*/ int n = req.getParams().getInt("n", DEFAULT_N); SolrDocumentList docs = new SolrDocumentList(); // Add docs here and modify object inline in code rsp.add("response", docs); try { SolrIndexSearcher searcher = req.getSearcher(); AtomicReader rdr = searcher.getAtomicReader(); BytesRef tmp = null; Terms terms = rdr.terms(ENTROPY_DATA_FIELD); TermsEnum te = terms.iterator(null); if (isContinue(cont)) { log.debug("continue from " + cont); TermsEnum.SeekStatus status = te.seekCeil(cont, true); if (status == TermsEnum.SeekStatus.END) { rsp.add("more", false); return; } else if (status == TermsEnum.SeekStatus.FOUND) { // If this term has already been seen then skip it. tmp = te.next(); if (endOfItr(tmp)) { rsp.add("more", false); return; } } else if (status == TermsEnum.SeekStatus.NOT_FOUND) { tmp = te.next(); } } else { tmp = te.next(); } String text = null; String[] vals = null; String ts = null; String docId = null; String vectorClock = null; int count = 0; BytesRef current = null; while (!endOfItr(tmp) && count < n) { current = BytesRef.deepCopyOf(tmp); text = tmp.utf8ToString(); log.debug("text: " + text); vals = text.split(" "); ts = vals[0]; // TODO: what if null? if (!(ts.compareTo(before) < 0)) { rsp.add("more", false); docs.setNumFound(count); return; } docId = vals[1]; vectorClock = vals[2]; SolrDocument tmpDoc = new SolrDocument(); tmpDoc.addField("doc_id", docId); tmpDoc.addField("base64_vclock", Base64.encodeBase64String(sha(vectorClock))); docs.add(tmpDoc); count++; tmp = te.next(); } if (count < n) { rsp.add("more", false); } else { rsp.add("more", true); String newCont = Base64.encodeBase64URLSafeString(current.bytes); // The continue context for next req to start where // this one finished. rsp.add("continuation", newCont); } docs.setNumFound(count); } catch (Exception e) { e.printStackTrace(); } }
From source file:com.basistech.lucene.tools.LuceneQueryTool.java
License:Apache License
private void enumerateTerms(String field) throws IOException { if (!allFieldNames.contains(field)) { throw new RuntimeException("Invalid field name: " + field); }//w ww . j a v a2 s . c o m List<LeafReaderContext> leaves = indexReader.leaves(); TermsEnum termsEnum; boolean unindexedField = true; Map<String, Integer> termCountMap = new TreeMap<>(); for (LeafReaderContext leaf : leaves) { Terms terms = leaf.reader().terms(field); if (terms == null) { continue; } unindexedField = false; termsEnum = terms.iterator(); BytesRef bytesRef; while ((bytesRef = termsEnum.next()) != null) { String term = bytesRef.utf8ToString(); if (termCountMap.containsKey(term)) { termCountMap.put(term, termsEnum.docFreq() + termCountMap.get(term)); } else { termCountMap.put(term, termsEnum.docFreq()); } } } if (unindexedField) { throw new RuntimeException("Unindexed field: " + field); } for (Map.Entry<String, Integer> entry : termCountMap.entrySet()) { defaultOut.println(entry.getKey() + " (" + entry.getValue() + ")"); } }
From source file:com.bdaum.zoom.lal.internal.lucene.Lucene.java
License:Open Source License
public List<ScoredString> listTags(File indexPath, int maxItems) throws IOException { Object readerToken = null;// w ww . j a v a2s . co m try { readerToken = indexPath == null ? null : getIndexReaderToken(indexPath); if (readerToken != null) { IndexReader indexReader = readerMap.get(readerToken); if (indexReader != null) { List<ScoredString> result = new ArrayList<ScoredString>(1000); Terms terms = MultiFields.getTerms(indexReader, LireActivator.FIELD_NAME_FULL_TEXT); if (terms == null) return null; TermsEnum termEnum = terms.iterator(); BytesRef bytesRef; while ((bytesRef = termEnum.next()) != null) result.add(new ScoredString(bytesRef.utf8ToString(), indexReader.docFreq(new Term(LireActivator.FIELD_NAME_FULL_TEXT, bytesRef)))); Collections.sort(result); return (result.size() > maxItems) ? result.subList(0, maxItems) : result; } } return null; } finally { if (readerToken != null) releaseIndexReader(indexPath, readerToken); } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private VcfIndexEntry createVcfIndexEntry(Document d, List<String> vcfInfoFields) { VcfIndexEntry vcfIndexEntry = new VcfIndexEntry(); vcfIndexEntry.setGene(d.get(FeatureIndexFields.GENE_ID.getFieldName())); BytesRef bytes = d.getBinaryValue(FeatureIndexFields.GENE_IDS.getFieldName()); if (bytes != null) { vcfIndexEntry.setGeneIds(bytes.utf8ToString()); }/*from w ww . j a va 2 s. c o m*/ vcfIndexEntry.setGeneName(d.get(FeatureIndexFields.GENE_NAME.getFieldName())); bytes = d.getBinaryValue(FeatureIndexFields.GENE_NAMES.getFieldName()); if (bytes != null) { vcfIndexEntry.setGeneNames(bytes.utf8ToString()); } vcfIndexEntry.setInfo(new HashMap<>()); String isExonStr = d.get(FeatureIndexFields.IS_EXON.getFieldName()); //TODO: remove, in future only binary // value will remain if (isExonStr == null) { bytes = d.getBinaryValue(FeatureIndexFields.IS_EXON.getFieldName()); if (bytes != null) { isExonStr = bytes.utf8ToString(); } } boolean isExon = isExonStr != null && Boolean.parseBoolean(isExonStr); vcfIndexEntry.setExon(isExon); vcfIndexEntry.getInfo().put(FeatureIndexFields.IS_EXON.getFieldName(), isExon); BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.VARIATION_TYPE.getFieldName()); if (featureIdBytes != null) { vcfIndexEntry.setVariationType(VariationType.valueOf(featureIdBytes.utf8ToString().toUpperCase())); } vcfIndexEntry.setFailedFilter(d.get(FeatureIndexFields.FAILED_FILTER.getFieldName())); IndexableField qualityField = d.getField(FeatureIndexFields.QUALITY.getFieldName()); if (qualityField != null) { vcfIndexEntry.setQuality(qualityField.numericValue().doubleValue()); } if (vcfInfoFields != null) { for (String infoField : vcfInfoFields) { if (d.getBinaryValue(infoField.toLowerCase()) != null) { vcfIndexEntry.getInfo().put(infoField, d.getBinaryValue(infoField.toLowerCase()).utf8ToString()); } else { vcfIndexEntry.getInfo().put(infoField, d.get(infoField.toLowerCase())); } } } return vcfIndexEntry; }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private FeatureIndexEntry createIndexEntry(ScoreDoc hit, Map<Long, BookmarkIndexEntry> foundBookmarkEntries, IndexSearcher searcher, List<String> vcfInfoFields) throws IOException { int docId = hit.doc; Document d = searcher.doc(docId); FeatureType featureType = FeatureType.forValue(d.get(FeatureIndexFields.FEATURE_TYPE.getFieldName())); FeatureIndexEntry entry;/*w ww . j a va 2 s . c om*/ switch (featureType) { case VARIATION: entry = createVcfIndexEntry(d, vcfInfoFields); break; case BOOKMARK: BookmarkIndexEntry bookmarkEntry = new BookmarkIndexEntry(); foundBookmarkEntries.put(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())), bookmarkEntry); entry = bookmarkEntry; break; default: entry = new FeatureIndexEntry(); } entry.setFeatureType(featureType); BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.FEATURE_ID.getFieldName()); if (featureIdBytes != null) { entry.setFeatureId(featureIdBytes.utf8ToString()); } entry.setStartIndex(d.getField(FeatureIndexFields.START_INDEX.getFieldName()).numericValue().intValue()); entry.setEndIndex(d.getField(FeatureIndexFields.END_INDEX.getFieldName()).numericValue().intValue()); entry.setFeatureFileId(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName()))); entry.setFeatureName(d.get(FeatureIndexFields.FEATURE_NAME.getFieldName())); String chromosomeId = d.getBinaryValue(FeatureIndexFields.CHROMOSOME_ID.getFieldName()).utf8ToString(); if (!chromosomeId.isEmpty()) { entry.setChromosome(new Chromosome(Long.parseLong(chromosomeId))); entry.getChromosome() .setName(d.getBinaryValue(FeatureIndexFields.CHROMOSOME_NAME.getFieldName()).utf8ToString()); } return entry; }
From source file:com.epimorphics.server.indexers.LuceneResult.java
License:Apache License
/** * Returns all the values of a field. These will be either Strings (for literals and labels), * Resources (for URI fields) or Longs (for numeric fields) *///from w w w . j a v a2 s .co m public Object[] fieldValues(String fieldName) { IndexableField[] fields = doc.getFields(fieldName); Object[] results = new Object[fields.length]; for (int i = 0; i < fields.length; i++) { IndexableField field = fields[i]; Object value = field.numericValue(); if (value == null) { value = field.stringValue(); } if (value == null) { BytesRef ref = field.binaryValue(); value = ResourceFactory.createResource(ref.utf8ToString()); } results[i] = value; } return results; }
From source file:com.factweavers.elasticsearch.payloadscorefunction.PayloadScoringFunction.java
License:Apache License
@Override public double score(int docId, float subQueryScore) { indexLookup.setNextDocId(docId);/*from w w w .ja v a2 s .c o m*/ float score = 0; int obtainedTerms = 0; try { Fields termVectors = indexLookup.termVectors(); Boolean isPayloadOrIndex = false; TermsEnum iterator = null; if (termVectors != null && termVectors.terms(field) != null && termVectors.terms(field).hasPayloads()) { isPayloadOrIndex = true; Terms fields = termVectors.terms(field); iterator = fields.iterator(null); } if (isPayloadOrIndex) { BytesRef firstElement = iterator.next(); while (firstElement != null && (obtainedTerms < values.size())) { String currentValue = firstElement.utf8ToString(); if (!values.contains(currentValue)) { //logger.info("Payload Skipping " + currentValue); firstElement = iterator.next(); continue; } else { obtainedTerms++; } //logger.info("Payload processing value is " + currentValue); DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null); docsAndPositions.nextDoc(); docsAndPositions.nextPosition(); BytesRef payload = docsAndPositions.getPayload(); if (payload != null) { score += PayloadHelper.decodeFloat(payload.bytes, payload.offset); //logger.info("Score " + score); } else { score += defaultValue; } firstElement = iterator.next(); } } else { IndexField fieldObject = indexLookup.get(field); for (String value : values) { IndexFieldTerm tokens = fieldObject.get(value, IndexLookup.FLAG_CACHE | IndexLookup.FLAG_PAYLOADS); if (fieldObject != null && tokens != null) { //logger.info("Processing docID=" + docId + " " + field // + " for " + value + " , " + tokens); if (tokens.iterator().hasNext()) { score += tokens.iterator().next().payloadAsFloat(defaultValue); } } } } } catch (IOException e) { //logger.info("Exception in Term Vectors"); e.printStackTrace(); } return new Double(score); }
From source file:com.floragunn.searchguard.support.DebugStreamOutput.java
License:Apache License
@Override public void writeBytesRef(final BytesRef bytes) throws IOException { System.out.print(bytes.utf8ToString()); super.writeBytesRef(bytes); }