Example usage for org.apache.lucene.util BytesRef utf8ToString

List of usage examples for org.apache.lucene.util BytesRef utf8ToString

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef utf8ToString.

Prototype

public String utf8ToString() 

Source Link

Document

Interprets stored bytes as UTF8 bytes, returning the resulting string

Usage

From source file:ci6226.facetsearch.java

public static void main(String[] args) throws Exception {
    String index = "./myindex";
    String field = "text";
    String queries = null;//from  w  w  w.  ja  va 2s . co m
    int hitsPerPage = 10;
    boolean raw = false;

    //http://lucene.apache.org/core/4_0_0/facet/org/apache/lucene/facet/doc-files/userguide.html#facet_accumulation

    IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index)));
    IndexSearcher searcher = new IndexSearcher(reader);
    // :Post-Release-Update-Version.LUCENE_XY:

    //TODO: SAME AS HOW U BUILD INDEX
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
    BufferedReader in = null;
    if (queries != null) {
        in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8"));
    } else {
        in = new BufferedReader(new InputStreamReader(System.in, "UTF-8"));
    }
    // :Post-Release-Update-Version.LUCENE_XY:
    QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
    while (true) {

        System.out.println("Enter query: ");
        String line = in.readLine();
        line = line.trim();
        if (line.length() == 0) {
            break;
        }
        Query query = parser.parse(line);
        System.out.println("Searching for: " + query.toString(field));
        Date start = new Date();
        searcher.search(query, null, 100);
        Date end = new Date();
        System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms");
        TopDocs results = searcher.search(query, 5 * hitsPerPage);
        ScoreDoc[] hits = results.scoreDocs;
        int numTotalHits = results.totalHits;

        //N= max docs
        //df = totoal matched doc
        //idf=log(N/df)

        for (int i = 0; i < hits.length; i++) {
            Document doc = searcher.doc(hits[i].doc);
            System.out.println(ANSI_BLUE + (i + 1) + ANSI_RESET + "\nScore=\t" + hits[i].score);
            String rtext = doc.get(field);
            System.out.println("Text=\t" + rtext);

            Terms vector = reader.getTermVector(i, "text");
            if (vector == null)
                continue;
            // System.out.println(vector.getSumDocFreq());

            // Terms vector = reader.getTermVector(hits[i].doc, field);  //hits[i].doc=docID
            TermsEnum termsEnum = vector.iterator(null);
            termsEnum = vector.iterator(termsEnum);
            Map<String, Integer> frequencies = new HashMap<>();
            BytesRef text = null;
            while ((text = termsEnum.next()) != null) {
                String term = text.utf8ToString();
                int freq = (int) termsEnum.totalTermFreq();
                frequencies.put(term, freq);
                // System.out.println("Time: "+term + " idef "+freq);
            }

        }

        //   String[] facetCatlog={""};

        System.out.println(numTotalHits + " total matching documents");

    }

    reader.close();
}

From source file:com.baidu.rigel.biplatform.tesseract.isservice.search.collector.TesseractResultRecordCollector.java

License:Open Source License

@Override
public void collect(int doc) throws IOException {
    List<Serializable> fieldValueList = new ArrayList<Serializable>();
    // List<String> fieldNameList=new ArrayList<String>();
    String groupBy = "";

    for (String dim : dimFields) {
        BinaryDocValues fieldValues = currBinaryDocValuesMap.get(dim);
        BytesRef byteRef = fieldValues.get(doc);
        String dimVal = byteRef.utf8ToString();
        fieldValueList.add(dimVal);//from  w ww.j  a va 2s. c  o m
        if (groupByFields.contains(dim)) {
            groupBy += dimVal + ",";
        }
    }

    for (String measure : this.measureFields) {
        FieldCache.Doubles fieldValues = currDoubleValuesMap.get(measure);
        fieldValueList.add(fieldValues.get(doc));
    }

    ResultRecord record = new ResultRecord(fieldValueList.toArray(new Serializable[0]), this.meta);
    record.setGroupBy(groupBy);
    this.result.add(record);

}

From source file:com.basho.yokozuna.handler.EntropyData.java

License:Open Source License

@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp)
        throws Exception, InstantiationException, IllegalAccessException {

    String contParam = req.getParams().get("continue");
    BytesRef cont = contParam != null ? decodeCont(contParam) : DEFAULT_CONT;

    // TODO: Make before required in handler config
    String before = req.getParams().get("before");
    if (before == null) {
        throw new Exception("Parameter 'before' is required");
    }/*from   w w  w .  j  a  v a2s  .c  om*/
    int n = req.getParams().getInt("n", DEFAULT_N);
    SolrDocumentList docs = new SolrDocumentList();

    // Add docs here and modify object inline in code
    rsp.add("response", docs);

    try {
        SolrIndexSearcher searcher = req.getSearcher();
        AtomicReader rdr = searcher.getAtomicReader();
        BytesRef tmp = null;
        Terms terms = rdr.terms(ENTROPY_DATA_FIELD);
        TermsEnum te = terms.iterator(null);

        if (isContinue(cont)) {
            log.debug("continue from " + cont);

            TermsEnum.SeekStatus status = te.seekCeil(cont, true);

            if (status == TermsEnum.SeekStatus.END) {
                rsp.add("more", false);
                return;
            } else if (status == TermsEnum.SeekStatus.FOUND) {
                // If this term has already been seen then skip it.
                tmp = te.next();

                if (endOfItr(tmp)) {
                    rsp.add("more", false);
                    return;
                }
            } else if (status == TermsEnum.SeekStatus.NOT_FOUND) {
                tmp = te.next();
            }
        } else {
            tmp = te.next();
        }

        String text = null;
        String[] vals = null;
        String ts = null;
        String docId = null;
        String vectorClock = null;
        int count = 0;
        BytesRef current = null;

        while (!endOfItr(tmp) && count < n) {
            current = BytesRef.deepCopyOf(tmp);
            text = tmp.utf8ToString();
            log.debug("text: " + text);
            vals = text.split(" ");
            ts = vals[0];

            // TODO: what if null?
            if (!(ts.compareTo(before) < 0)) {
                rsp.add("more", false);
                docs.setNumFound(count);
                return;
            }

            docId = vals[1];
            vectorClock = vals[2];
            SolrDocument tmpDoc = new SolrDocument();
            tmpDoc.addField("doc_id", docId);
            tmpDoc.addField("base64_vclock", Base64.encodeBase64String(sha(vectorClock)));
            docs.add(tmpDoc);
            count++;
            tmp = te.next();
        }

        if (count < n) {
            rsp.add("more", false);
        } else {
            rsp.add("more", true);
            String newCont = Base64.encodeBase64URLSafeString(current.bytes);
            // The continue context for next req to start where
            // this one finished.
            rsp.add("continuation", newCont);
        }

        docs.setNumFound(count);

    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:com.basistech.lucene.tools.LuceneQueryTool.java

License:Apache License

private void enumerateTerms(String field) throws IOException {
    if (!allFieldNames.contains(field)) {
        throw new RuntimeException("Invalid field name: " + field);
    }//w  ww  .  j a v  a2  s . c  o m
    List<LeafReaderContext> leaves = indexReader.leaves();
    TermsEnum termsEnum;
    boolean unindexedField = true;
    Map<String, Integer> termCountMap = new TreeMap<>();
    for (LeafReaderContext leaf : leaves) {
        Terms terms = leaf.reader().terms(field);
        if (terms == null) {
            continue;
        }
        unindexedField = false;
        termsEnum = terms.iterator();
        BytesRef bytesRef;
        while ((bytesRef = termsEnum.next()) != null) {
            String term = bytesRef.utf8ToString();
            if (termCountMap.containsKey(term)) {
                termCountMap.put(term, termsEnum.docFreq() + termCountMap.get(term));
            } else {
                termCountMap.put(term, termsEnum.docFreq());
            }
        }
    }
    if (unindexedField) {
        throw new RuntimeException("Unindexed field: " + field);
    }
    for (Map.Entry<String, Integer> entry : termCountMap.entrySet()) {
        defaultOut.println(entry.getKey() + " (" + entry.getValue() + ")");
    }
}

From source file:com.bdaum.zoom.lal.internal.lucene.Lucene.java

License:Open Source License

public List<ScoredString> listTags(File indexPath, int maxItems) throws IOException {
    Object readerToken = null;// w  ww  . j a v  a2s .  co  m
    try {
        readerToken = indexPath == null ? null : getIndexReaderToken(indexPath);
        if (readerToken != null) {
            IndexReader indexReader = readerMap.get(readerToken);
            if (indexReader != null) {
                List<ScoredString> result = new ArrayList<ScoredString>(1000);
                Terms terms = MultiFields.getTerms(indexReader, LireActivator.FIELD_NAME_FULL_TEXT);
                if (terms == null)
                    return null;
                TermsEnum termEnum = terms.iterator();
                BytesRef bytesRef;
                while ((bytesRef = termEnum.next()) != null)
                    result.add(new ScoredString(bytesRef.utf8ToString(),
                            indexReader.docFreq(new Term(LireActivator.FIELD_NAME_FULL_TEXT, bytesRef))));
                Collections.sort(result);
                return (result.size() > maxItems) ? result.subList(0, maxItems) : result;
            }
        }
        return null;
    } finally {
        if (readerToken != null)
            releaseIndexReader(indexPath, readerToken);
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private VcfIndexEntry createVcfIndexEntry(Document d, List<String> vcfInfoFields) {
    VcfIndexEntry vcfIndexEntry = new VcfIndexEntry();
    vcfIndexEntry.setGene(d.get(FeatureIndexFields.GENE_ID.getFieldName()));

    BytesRef bytes = d.getBinaryValue(FeatureIndexFields.GENE_IDS.getFieldName());
    if (bytes != null) {
        vcfIndexEntry.setGeneIds(bytes.utf8ToString());
    }/*from   w  ww . j  a va 2  s. c  o  m*/

    vcfIndexEntry.setGeneName(d.get(FeatureIndexFields.GENE_NAME.getFieldName()));

    bytes = d.getBinaryValue(FeatureIndexFields.GENE_NAMES.getFieldName());
    if (bytes != null) {
        vcfIndexEntry.setGeneNames(bytes.utf8ToString());
    }

    vcfIndexEntry.setInfo(new HashMap<>());

    String isExonStr = d.get(FeatureIndexFields.IS_EXON.getFieldName()); //TODO: remove, in future only binary
                                                                         // value will remain
    if (isExonStr == null) {
        bytes = d.getBinaryValue(FeatureIndexFields.IS_EXON.getFieldName());
        if (bytes != null) {
            isExonStr = bytes.utf8ToString();
        }
    }
    boolean isExon = isExonStr != null && Boolean.parseBoolean(isExonStr);
    vcfIndexEntry.setExon(isExon);
    vcfIndexEntry.getInfo().put(FeatureIndexFields.IS_EXON.getFieldName(), isExon);

    BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.VARIATION_TYPE.getFieldName());
    if (featureIdBytes != null) {
        vcfIndexEntry.setVariationType(VariationType.valueOf(featureIdBytes.utf8ToString().toUpperCase()));
    }
    vcfIndexEntry.setFailedFilter(d.get(FeatureIndexFields.FAILED_FILTER.getFieldName()));

    IndexableField qualityField = d.getField(FeatureIndexFields.QUALITY.getFieldName());
    if (qualityField != null) {
        vcfIndexEntry.setQuality(qualityField.numericValue().doubleValue());
    }

    if (vcfInfoFields != null) {
        for (String infoField : vcfInfoFields) {
            if (d.getBinaryValue(infoField.toLowerCase()) != null) {
                vcfIndexEntry.getInfo().put(infoField,
                        d.getBinaryValue(infoField.toLowerCase()).utf8ToString());
            } else {
                vcfIndexEntry.getInfo().put(infoField, d.get(infoField.toLowerCase()));
            }
        }
    }

    return vcfIndexEntry;
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private FeatureIndexEntry createIndexEntry(ScoreDoc hit, Map<Long, BookmarkIndexEntry> foundBookmarkEntries,
        IndexSearcher searcher, List<String> vcfInfoFields) throws IOException {
    int docId = hit.doc;
    Document d = searcher.doc(docId);
    FeatureType featureType = FeatureType.forValue(d.get(FeatureIndexFields.FEATURE_TYPE.getFieldName()));
    FeatureIndexEntry entry;/*w ww  .  j  a  va  2 s . c om*/
    switch (featureType) {
    case VARIATION:
        entry = createVcfIndexEntry(d, vcfInfoFields);
        break;
    case BOOKMARK:
        BookmarkIndexEntry bookmarkEntry = new BookmarkIndexEntry();
        foundBookmarkEntries.put(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())),
                bookmarkEntry);
        entry = bookmarkEntry;
        break;
    default:
        entry = new FeatureIndexEntry();
    }

    entry.setFeatureType(featureType);
    BytesRef featureIdBytes = d.getBinaryValue(FeatureIndexFields.FEATURE_ID.getFieldName());
    if (featureIdBytes != null) {
        entry.setFeatureId(featureIdBytes.utf8ToString());
    }

    entry.setStartIndex(d.getField(FeatureIndexFields.START_INDEX.getFieldName()).numericValue().intValue());
    entry.setEndIndex(d.getField(FeatureIndexFields.END_INDEX.getFieldName()).numericValue().intValue());
    entry.setFeatureFileId(Long.parseLong(d.get(FeatureIndexFields.FILE_ID.getFieldName())));
    entry.setFeatureName(d.get(FeatureIndexFields.FEATURE_NAME.getFieldName()));

    String chromosomeId = d.getBinaryValue(FeatureIndexFields.CHROMOSOME_ID.getFieldName()).utf8ToString();
    if (!chromosomeId.isEmpty()) {
        entry.setChromosome(new Chromosome(Long.parseLong(chromosomeId)));
        entry.getChromosome()
                .setName(d.getBinaryValue(FeatureIndexFields.CHROMOSOME_NAME.getFieldName()).utf8ToString());
    }

    return entry;
}

From source file:com.epimorphics.server.indexers.LuceneResult.java

License:Apache License

/**
 * Returns all the values of a field. These will be either Strings (for literals and labels),
 * Resources (for URI fields) or Longs (for numeric fields)
 *///from   w w  w .  j a  v a2  s .co m
public Object[] fieldValues(String fieldName) {
    IndexableField[] fields = doc.getFields(fieldName);
    Object[] results = new Object[fields.length];
    for (int i = 0; i < fields.length; i++) {
        IndexableField field = fields[i];
        Object value = field.numericValue();
        if (value == null) {
            value = field.stringValue();
        }
        if (value == null) {
            BytesRef ref = field.binaryValue();
            value = ResourceFactory.createResource(ref.utf8ToString());
        }
        results[i] = value;
    }
    return results;
}

From source file:com.factweavers.elasticsearch.payloadscorefunction.PayloadScoringFunction.java

License:Apache License

@Override
public double score(int docId, float subQueryScore) {
    indexLookup.setNextDocId(docId);/*from   w w  w .ja  v a2  s  .c o m*/
    float score = 0;
    int obtainedTerms = 0;
    try {
        Fields termVectors = indexLookup.termVectors();
        Boolean isPayloadOrIndex = false;
        TermsEnum iterator = null;
        if (termVectors != null && termVectors.terms(field) != null && termVectors.terms(field).hasPayloads()) {
            isPayloadOrIndex = true;
            Terms fields = termVectors.terms(field);
            iterator = fields.iterator(null);
        }

        if (isPayloadOrIndex) {
            BytesRef firstElement = iterator.next();
            while (firstElement != null && (obtainedTerms < values.size())) {
                String currentValue = firstElement.utf8ToString();
                if (!values.contains(currentValue)) {
                    //logger.info("Payload Skipping " + currentValue);
                    firstElement = iterator.next();
                    continue;
                } else {
                    obtainedTerms++;
                }
                //logger.info("Payload processing value is " + currentValue);
                DocsAndPositionsEnum docsAndPositions = iterator.docsAndPositions(null, null);
                docsAndPositions.nextDoc();
                docsAndPositions.nextPosition();
                BytesRef payload = docsAndPositions.getPayload();
                if (payload != null) {
                    score += PayloadHelper.decodeFloat(payload.bytes, payload.offset);
                    //logger.info("Score " + score);
                } else {
                    score += defaultValue;
                }
                firstElement = iterator.next();
            }
        } else {
            IndexField fieldObject = indexLookup.get(field);
            for (String value : values) {
                IndexFieldTerm tokens = fieldObject.get(value,
                        IndexLookup.FLAG_CACHE | IndexLookup.FLAG_PAYLOADS);
                if (fieldObject != null && tokens != null) {
                    //logger.info("Processing docID=" + docId + " " + field
                    //      + " for " + value + " , " + tokens);
                    if (tokens.iterator().hasNext()) {
                        score += tokens.iterator().next().payloadAsFloat(defaultValue);
                    }

                }
            }
        }
    } catch (IOException e) {
        //logger.info("Exception in Term Vectors");
        e.printStackTrace();
    }
    return new Double(score);
}

From source file:com.floragunn.searchguard.support.DebugStreamOutput.java

License:Apache License

@Override
public void writeBytesRef(final BytesRef bytes) throws IOException {
    System.out.print(bytes.utf8ToString());
    super.writeBytesRef(bytes);
}