Example usage for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text)

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:com.gitblit.tickets.TicketIndexer.java

License:Apache License

private void toDocField(Document doc, Lucene lucene, String value) {
    if (StringUtils.isEmpty(value)) {
        return;// w  w w  .ja  v  a2  s.co  m
    }
    doc.add(new org.apache.lucene.document.Field(lucene.name(), value, TextField.TYPE_STORED));
    doc.add(new SortedDocValuesField(lucene.name(), new BytesRef(value)));
}

From source file:com.github.flaxsearch.testutil.Fixtures.java

License:Apache License

private static void populateIndex(Directory directory) {
    try (IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig())) {

        {/*from   ww w.j a  v  a 2s.c o m*/
            Document doc = new Document();
            doc.add(new TextField("field2", "here is some text", Field.Store.YES));
            doc.add(new StringField("field1", "value1", Field.Store.YES));
            doc.add(new IntPoint("point", 2, 4));
            doc.add(new IntPoint("point", 0, 1));
            doc.add(new IntPoint("point", 2, 1));
            doc.add(new IntPoint("point", 14, 4));
            writer.addDocument(doc);
            // more than one segment
            writer.commit();
        }

        {
            Document doc = new Document();
            doc.add(new StringField("field1", "value2", Field.Store.YES));
            doc.add(new BinaryDocValuesField("field1", new BytesRef("some bytes")));
            doc.add(new TextField("field3",
                    "this is some more text in a different field value1 value11 value12 value21",
                    Field.Store.YES));
            writer.addDocument(doc);
        }

    } catch (IOException e) {
        throw new RuntimeException("We're a RAMDirectory, this should never happen!");
    }
}

From source file:com.github.flaxsearch.util.BytesRefUtils.java

License:Apache License

private static Function<String, BytesRef> getDecoder(String type) {
    switch (type.toLowerCase(Locale.ROOT)) {
    case "base64":
        return s -> new BytesRef(Base64.getUrlDecoder().decode(s.getBytes(Charset.defaultCharset())));
    case "utf8":
        return BytesRef::new;
    case "int":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.intToPrefixCoded(Integer.parseInt(s), 0, builder);
            return builder.get();
        };//from  ww  w  .  j  a v  a2s.c o  m
    case "long":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.longToPrefixCoded(Long.parseLong(s), 0, builder);
            return builder.get();
        };
    case "float":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.intToPrefixCoded(NumericUtils.floatToSortableInt(Float.parseFloat(s)), 0,
                    builder);
            return builder.get();
        };
    case "double":
        return s -> {
            BytesRefBuilder builder = new BytesRefBuilder();
            LegacyNumericUtils.longToPrefixCoded(NumericUtils.doubleToSortableLong(Double.parseDouble(s)), 0,
                    builder);
            return builder.get();
        };
    default:
        throw new IllegalArgumentException("Unknown decoder type: " + type);
    }
}

From source file:com.github.flaxsearch.util.ReaderManager.java

License:Apache License

default TermsEnum findTermPostings(Integer segment, String field, String term) throws IOException {

    Fields fields = getFields(segment);/*from  www  .  j a  v a2  s  .  c  o  m*/
    Terms terms = fields.terms(field);

    if (terms == null) {
        String msg = String.format("No field %s", field);
        throw new WebApplicationException(msg, Response.Status.NOT_FOUND);
    }

    TermsEnum te = terms.iterator();

    assert (term != null);
    if (!te.seekExact(new BytesRef(term))) {
        String msg = String.format("No term %s on field %s", term, field);
        throw new WebApplicationException(msg, Response.Status.NOT_FOUND);
    }

    return te;
}

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

List<ClassificationResult<BytesRef>> buildListFromTopDocs(IndexSearcher searcher, String categoryFieldName,
        TopDocs topDocs, int k) throws IOException {
    Map<BytesRef, Integer> classCounts = new HashMap<>();
    Map<BytesRef, Double> classBoosts = new HashMap<>(); // this is a boost based on class ranking positions in topDocs
    float maxScore = topDocs.getMaxScore();
    for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
        IndexableField storableField = searcher.doc(scoreDoc.doc).getField(categoryFieldName);
        if (storableField != null) {
            BytesRef cl = new BytesRef(storableField.stringValue());
            //update count
            Integer count = classCounts.get(cl);
            if (count != null) {
                classCounts.put(cl, count + 1);
            } else {
                classCounts.put(cl, 1);//w w  w  . j a v  a2  s . c o m
            }
            //update boost, the boost is based on the best score
            Double totalBoost = classBoosts.get(cl);
            double singleBoost = scoreDoc.score / maxScore;
            if (totalBoost != null) {
                classBoosts.put(cl, totalBoost + singleBoost);
            } else {
                classBoosts.put(cl, singleBoost);
            }
        }
    }
    List<ClassificationResult<BytesRef>> returnList = new ArrayList<>();
    List<ClassificationResult<BytesRef>> temporaryList = new ArrayList<>();
    int sumdoc = 0;
    for (Map.Entry<BytesRef, Integer> entry : classCounts.entrySet()) {
        Integer count = entry.getValue();
        Double normBoost = classBoosts.get(entry.getKey()) / count; //the boost is normalized to be 0<b<1
        temporaryList.add(new ClassificationResult<>(entry.getKey().clone(), (count * normBoost) / (double) k));
        sumdoc += count;
    }

    //correction
    if (sumdoc < k) {
        for (ClassificationResult<BytesRef> cr : temporaryList) {
            returnList.add(
                    new ClassificationResult<>(cr.getAssignedClass(), cr.getScore() * k / (double) sumdoc));
        }
    } else {
        returnList = temporaryList;
    }
    return returnList;
}

From source file:com.github.tteofili.looseen.QueryingClassifier.java

License:Apache License

@Override
public ClassificationResult<BytesRef> assignClass(String text) throws IOException {
    ClassificationResult<BytesRef> result = null;
    for (Map.Entry<String, Query> entry : queriesPerClass.entrySet()) {
        TopDocs search = indexSearcher.search(entry.getValue(), 1);
        float score;
        if (useCounts) {
            score = search.totalHits;/*from  ww  w.  j  a  va  2 s.  com*/
        } else {
            score = search.getMaxScore();
        }

        if (result == null) {
            result = new ClassificationResult<>(new BytesRef(entry.getKey()), score);
        } else if (score > result.getScore()) {
            result = new ClassificationResult<>(new BytesRef(entry.getKey()), score);
        }
    }
    return result;
}

From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java

License:Apache License

void buildIndex(File indexDir, IndexWriter indexWriter) throws IOException {
    File[] groupsDir = indexDir.listFiles();
    if (groupsDir != null) {
        for (File group : groupsDir) {
            String groupName = group.getName();
            File[] posts = group.listFiles();
            if (posts != null) {
                for (File postFile : posts) {
                    String number = postFile.getName();
                    NewsPost post = parse(postFile, groupName, number);
                    Document d = new Document();
                    d.add(new StringField(CATEGORY_FIELD, post.getGroup(), Field.Store.YES));
                    d.add(new SortedDocValuesField(CATEGORY_FIELD, new BytesRef(post.getGroup())));
                    d.add(new TextField(SUBJECT_FIELD, post.getSubject(), Field.Store.YES));
                    d.add(new TextField(BODY_FIELD, post.getBody(), Field.Store.YES));
                    indexWriter.addDocument(d);
                }/*w w w  .ja v a 2s  .  c  o m*/
            }
        }
    }
    indexWriter.commit();
}

From source file:com.github.tteofili.looseen.TestWikipediaClassification.java

License:Apache License

private static void importWikipedia(File dump, IndexWriter indexWriter) throws Exception {
    long start = System.currentTimeMillis();
    int count = 0;
    System.out.format("Importing %s...%n", dump);

    String title = null;/*  w w w .  j  av  a2s  . co m*/
    String text = null;
    Set<String> cats = new HashSet<>();

    XMLInputFactory factory = XMLInputFactory.newInstance();
    StreamSource source;
    if (dump.getName().endsWith(".xml")) {
        source = new StreamSource(dump);
    } else {
        throw new RuntimeException("can index only wikipedia XML files");
    }
    XMLStreamReader reader = factory.createXMLStreamReader(source);
    while (reader.hasNext()) {
        if (count == Integer.MAX_VALUE) {
            break;
        }
        switch (reader.next()) {
        case XMLStreamConstants.START_ELEMENT:
            if ("title".equals(reader.getLocalName())) {
                title = reader.getElementText();
            } else if (TEXT_FIELD.equals(reader.getLocalName())) {
                text = reader.getElementText();
                Matcher matcher = pattern.matcher(text);
                int pos = 0;
                while (matcher.find(pos)) {
                    String group = matcher.group(1);
                    String catName = group.replaceAll("\\|\\s", "").replaceAll("\\|\\*", "");
                    Collections.addAll(cats, catName.split("\\|"));
                    pos = matcher.end();
                }
            }
            break;
        case XMLStreamConstants.END_ELEMENT:
            if ("page".equals(reader.getLocalName())) {
                Document page = new Document();
                if (title != null) {
                    page.add(new TextField(TITLE_FIELD, title, StoredField.Store.YES));
                }
                if (text != null) {
                    page.add(new TextField(TEXT_FIELD, text, StoredField.Store.YES));
                }
                for (String cat : cats) {
                    page.add(new StringField(CATEGORY_FIELD, cat, Field.Store.YES));
                    page.add(new SortedSetDocValuesField(CATEGORY_FIELD, new BytesRef(cat)));
                }
                indexWriter.addDocument(page);
                cats.clear();
                count++;
                if (count % 100000 == 0) {
                    indexWriter.commit();
                    System.out.format("Committed %d pages%n", count);
                }
            }
            break;
        }
    }

    indexWriter.commit();

    long millis = System.currentTimeMillis() - start;
    System.out.format("Imported %d pages in %d seconds (%.2fms/page)%n", count, millis / 1000,
            (double) millis / count);
}

From source file:com.hrhih.index.suggest.Input.java

License:Apache License

public Input(String term, long v, BytesRef payload) {
    this(new BytesRef(term), v, payload);
}

From source file:com.hrhih.index.suggest.Input.java

License:Apache License

public Input(String term, long v, Set<BytesRef> contexts) {
    this(new BytesRef(term), v, null, false, contexts, true);
}