Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.gitblit.tickets.TicketIndexer.java

License:Apache License

private void toDocField(Document doc, Lucene lucene, String value) {
    if (StringUtils.isEmpty(value)) {
        return;// w  w w  .ja  v  a2  s.co  m
    }
    doc.add(new org.apache.lucene.document.Field(lucene.name(), value, TextField.TYPE_STORED));
    doc.add(new SortedDocValuesField(lucene.name(), new BytesRef(value)));
}

From source file:com.github.flaxsearch.testutil.Fixtures.java

License:Apache License

private static void populateIndex(Directory directory) {
    try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {

        {/*from   ww w.j a  v  a 2s.c o m*/
            Document doc = new Document();
            doc.add(new TextField("field2", "here is some text", Field.Store.YES));
            doc.add(new StringField("field1", "value1", Field.Store.YES));
            doc.add(new IntPoint("point", 2, 4));
            doc.add(new IntPoint("point", 0, 1));
            doc.add(new IntPoint("point", 2, 1));
            doc.add(new IntPoint("point", 14, 4));
            writer.addDocument(doc);
            // more than one segment
            writer.commit();
        }

        {
            Document doc = new Document();
            doc.add(new StringField("field1", "value2", Field.Store.YES));
            doc.add(new BinaryDocValuesField("field1", new BytesRef("some bytes")));
            doc.add(new TextField("field3",
                    "this is some more text in a different field value1 value11 value12 value21",
                    Field.Store.YES));
            writer.addDocument(doc);
        }

    } catch (IOException e) {
        throw new RuntimeException("We're a RAMDirectory, this should never happen!");
    }
}

From source file:com.github.flaxsearch.util.BytesRefUtils.java

License:Apache License

private static Function<String, BytesRef> getDecoder(String type) {
    switch (type.toLowerCase(Locale.ROOT)) {
    case "base64":
        return s -> new BytesRef(Base64.getUrlDecoder().decode(s.getBytes(Charset.defaultCharset())));
    case "utf8":
        return BytesRef::new;
    case "int":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.intToPrefixCoded(Integer.parseInt(s), 0, builder);
            return builder.get();
        };//from  ww  w  .  j  a v  a2s.c o  m
    case "long":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.longToPrefixCoded(Long.parseLong(s), 0, builder);
            return builder.get();
        };
    case "float":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(Float.parseFloat(s)), 0,
                    builder);
            return builder.get();
        };
    case "double":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(Double.parseDouble(s)), 0,
                    builder);
            return builder.get();
        };
    default:
        throw new IllegalArgumentException("Unknown decoder type: " + type);
    }
}

From source file:com.github.flaxsearch.util.ReaderManager.java

License:Apache License

default TermsEnum findTermPostings(Integer segment, String field, String term) throws IOException {

    Fields fields = getFields(segment);/*from  www  .  j a  v a2  s  .  c  o  m*/
    Terms terms = fields.terms(field);

    if (terms == null) {
        String msg = String.format("No field %s", field);
        throw new WebApplicationException(msg, Response.Status.NOT_FOUND);
    }

    TermsEnum te = terms.iterator();

    assert (term != null);
    if (!te.seekExact(new BytesRef(term))) {
        String msg = String.format("No term %s on field %s", term, field);
        throw new WebApplicationException(msg, Response.Status.NOT_FOUND);
    }

    return te;
}

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

List<ClassificationResult<BytesRef>> buildListFromTopDocs(IndexSearcher searcher, String categoryFieldName,
        TopDocs topDocs, int k) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = searcher.doc(scoreDoc.doc).getField(categoryFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);//w w  w  . j a v  a2  s . c o m
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:com.github.tteofili.looseen.QueryingClassifier.java

License:Apache License

@Override
public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
    ClassificationResult<BytesRef> result = null;
    for (Map.Entry<String, Query> entry : queriesPerClass.entrySet()) {
        TopDocs search = indexSearcher.search(entry.getValue(), 1);
        float score;
        if (useCounts) {
            score = search.totalHits;/*from  ww  w.  j  a  va  2 s.  com*/
        } else {
            score = search.getMaxScore();
        }

        if (result == null) {
            result = new ClassificationResult<>(new BytesRef(entry.getKey()), score);
        } else if (score > result.getScore()) {
            result = new ClassificationResult<>(new BytesRef(entry.getKey()), score);
        }
    }
    return result;
}

From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java

License:Apache License

void buildIndex(File indexDir, IndexWriter indexWriter) throws IOException {
    File[] groupsDir = indexDir.listFiles();
    if (groupsDir != null) {
        for (File group : groupsDir) {
            String groupName = group.getName();
            File[] posts = group.listFiles();
            if (posts != null) {
                for (File postFile : posts) {
                    String number = postFile.getName();
                    NewsPost post = parse(postFile, groupName, number);
                    Document d = new Document();
                    d.add(new StringField(CATEGORY_FIELD, post.getGroup(), Field.Store.YES));
                    d.add(new SortedDocValuesField(CATEGORY_FIELD, new BytesRef(post.getGroup())));
                    d.add(new TextField(SUBJECT_FIELD, post.getSubject(), Field.Store.YES));
                    d.add(new TextField(BODY_FIELD, post.getBody(), Field.Store.YES));
                    indexWriter.addDocument(d);
                }/*w w w  .ja v a 2s  .  c  o m*/
            }
        }
    }
    indexWriter.commit();
}

From source file:com.github.tteofili.looseen.TestWikipediaClassification.java

License:Apache License

private static void importWikipedia(File dump, IndexWriter indexWriter) throws Exception {
    long start = System.currentTimeMillis();
    int count = 0;
    System.out.format("Importing %s...%n", dump);

    String title = null;/*  w w w .  j  av  a2s  . co m*/
    String text = null;
    Set<String> cats = new HashSet<>();

    XMLInputFactory factory = XMLInputFactory.newInstance();
    StreamSource source;
    if (dump.getName().endsWith(".xml")) {
        source = new StreamSource(dump);
    } else {
        throw new RuntimeException("can index only wikipedia XML files");
    }
    XMLStreamReader reader = factory.createXMLStreamReader(source);
    while (reader.hasNext()) {
        if (count == Integer.MAX_VALUE) {
            break;
        }
        switch (reader.next()) {
        case XMLStreamConstants.START_ELEMENT:
            if ("title".equals(reader.getLocalName())) {
                title = reader.getElementText();
            } else if (TEXT_FIELD.equals(reader.getLocalName())) {
                text = reader.getElementText();
                Matcher matcher = pattern.matcher(text);
                int pos = 0;
                while (matcher.find(pos)) {
                    String group = matcher.group(1);
                    String catName = group.replaceAll("\\|\\s", "").replaceAll("\\|\\*", "");
                    Collections.addAll(cats, catName.split("\\|"));
                    pos = matcher.end();
                }
            }
            break;
        case XMLStreamConstants.END_ELEMENT:
            if ("page".equals(reader.getLocalName())) {
                Document page = new Document();
                if (title != null) {
                    page.add(new TextField(TITLE_FIELD, title, StoredField.Store.YES));
                }
                if (text != null) {
                    page.add(new TextField(TEXT_FIELD, text, StoredField.Store.YES));
                }
                for (String cat : cats) {
                    page.add(new StringField(CATEGORY_FIELD, cat, Field.Store.YES));
                    page.add(new SortedSetDocValuesField(CATEGORY_FIELD, new BytesRef(cat)));
                }
                indexWriter.addDocument(page);
                cats.clear();
                count++;
                if (count % 100000 == 0) {
                    indexWriter.commit();
                    System.out.format("Committed %d pages%n", count);
                }
            }
            break;
        }
    }

    indexWriter.commit();

    long millis = System.currentTimeMillis() - start;
    System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000,
            (double) millis / count);
}

From source file:com.hrhih.index.suggest.Input.java

License:Apache License

public Input(String term, long v, BytesRef payload) {
    this(new BytesRef(term), v, payload);
}

From source file:com.hrhih.index.suggest.Input.java

License:Apache License

public Input(String term, long v, Set<BytesRef> contexts) {
    this(new BytesRef(term), v, null, false, contexts, true);
}