Example usage for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text)

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefArray() throws Exception {
    final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("baz"));

    Assert.assertEquals("Not all items streamed.", 3L, StreamUtils.stream(bArr).count());

    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils/*from w  w w .  j  ava2 s . c  om*/
                    .stream(bArr).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefArray_nonUnique() throws Exception {
    final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("baz"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("foo"));

    Assert.assertEquals("Not all items streamed.", 6L, StreamUtils.stream(bArr).count());

    Assert.assertEquals("Term count mismatch.", 3L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term count mismatch.", 2L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils/*from w  ww. j  a va2 s  .c om*/
                    .stream(bArr).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_termsEnum() throws Exception {
    final BytesRef[] terms = { new BytesRef("foo"), new BytesRef("bar"), new BytesRef("baz") };

    final class TEnum extends TermsEnum {
        int idx = 0;

        @Override/*from   www  . j a v a  2 s.c  o m*/
        public SeekStatus seekCeil(final BytesRef text) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void seekExact(final long ord) {
            throw new UnsupportedOperationException();
        }

        @Override
        public BytesRef term() {
            throw new UnsupportedOperationException();
        }

        @Override
        public long ord() {
            throw new UnsupportedOperationException();
        }

        @Override
        public int docFreq() {
            throw new UnsupportedOperationException();
        }

        @Override
        public long totalTermFreq() {
            throw new UnsupportedOperationException();
        }

        @Override
        public PostingsEnum postings(final Bits liveDocs, final PostingsEnum reuse, final int flags) {
            throw new UnsupportedOperationException();
        }

        @Nullable
        @Override
        public BytesRef next() {
            if (this.idx < terms.length) {
                return terms[this.idx++];
            }
            return null;
        }
    }

    Assert.assertEquals("Not all terms streamed.", (long) terms.length,
            StreamUtils.stream(new TEnum()).count());

    Assert.assertEquals("Term count mismatch.", 1L,
            StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term count mismatch.", 1L,
            StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term count mismatch.", 1L,
            StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils
                    .stream(new TEnum()).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefHash() throws Exception {
    final BytesRefHash brh = new BytesRefHash();
    brh.add(new BytesRef("foo"));
    brh.add(new BytesRef("bar"));
    brh.add(new BytesRef("baz"));

    Assert.assertEquals("Not all terms streamed.", 3L, StreamUtils.stream(brh).count());

    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils//from w w w.j  av  a  2 s  .c o  m
                    .stream(brh).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:dk.netarkivet.common.distribute.arcrepository.ARCLookup.java

License:Open Source License

/**
 * Lucene Lookup. //from   w w w  .j  a va2  s  . com
 *
 * @param uri A URI to look for.
 * @return The file and offset where that URI can be found, or null if it doesn't exist. 
 */
private ARCKey luceneLookUp(String uri) {
    BytesRef uriRef = new BytesRef(uri.getBytes()); // Should we decide which charset?

    Query query = new ConstantScoreQuery(
            new TermRangeFilter(DigestIndexer.FIELD_URL, uriRef, uriRef, true, true));

    try {
        AllDocsCollector allResultsCollector = new AllDocsCollector();
        luceneSearcher.search(query, allResultsCollector);
        Document doc = null;
        List<ScoreDoc> hits = allResultsCollector.getHits();
        if (hits != null) {
            log.debug("Found {} hits for uri: {}", hits.size(), uri);
            int i = 0;
            for (ScoreDoc hit : hits) {
                int docId = hit.doc;
                doc = luceneSearcher.doc(docId);
                String origin = doc.get(DigestIndexer.FIELD_ORIGIN);
                // Here is where we will handle multiple hits in the future
                if (origin == null) {
                    log.debug("No origin for URL '{}' hit {}", uri, i++);
                    continue;
                }
                String[] originParts = origin.split(",");
                if (originParts.length != 2) {
                    throw new IllegalState("Bad origin for URL '" + uri + "': '" + origin + "'");
                }
                log.debug("Found document with origin: {}", origin);
                return new ARCKey(originParts[0], Long.parseLong(originParts[1]));
            }
        }
    } catch (IOException e) {
        throw new IOFailure("Fatal error looking up '" + uri + "'", e);
    }
    return null;
}

From source file:dk.netarkivet.harvester.indexserver.DedupCrawlLogIndexCacheTester.java

License:Open Source License

private void verifySearchResult(Map<String, String> origins, IndexSearcher index) throws IOException {
    Set<String> urls = new HashSet<String>(origins.keySet());

    for (String urlValue : urls) {
        BytesRef uriRef = new BytesRef(urlValue);
        Query q = new ConstantScoreQuery(
                new TermRangeFilter(DigestIndexer.FIELD_URL, uriRef, uriRef, true, true));
        AllDocsCollector collector = new AllDocsCollector();
        index.search(q, collector);//from  w  w  w  .  j  a  v  a  2s . co  m
        List<ScoreDoc> hits = collector.getHits();
        for (ScoreDoc hit : hits) {
            int docID = hit.doc;
            Document doc = index.doc(docID);
            String url = doc.get("url");
            String origin = doc.get("origin");
            assertEquals("Should have correct origin for url " + url, origins.get(url), origin);
            // Ensure that each occurs only once.
            String removedValue = origins.remove(url);
            if (removedValue == null) {
                // System.out.println("'" + url + "' not found in origins map");
            } else {
                // System.out.println("'" + url + "' was found in origins map");
            }
        }
    }
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

public static File generateIndex() throws IOException {
    final File INDEX = new File("target/testindex.deletefreely");
    Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);

    final FieldType SINGLE_F = new FieldType();
    SINGLE_F.setIndexed(true);/*from   w w w  . j av a2  s . c o m*/
    SINGLE_F.setStored(true);

    final FieldType MULTI_F = new FieldType();
    MULTI_F.setIndexed(true);
    MULTI_F.setStored(true);

    final FieldType SEARCH_F = new FieldType();
    SEARCH_F.setIndexed(true);

    final FieldType LONG_F = new FieldType();
    LONG_F.setIndexed(true);
    LONG_F.setStored(true);
    LONG_F.setNumericType(FieldType.NumericType.LONG);

    /*        final FieldType DOUBLE_F = new FieldType();
            DOUBLE_F.setIndexed(true);
            DOUBLE_F.setStored(true);
            DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE);
            
            final FieldType FLOAT_F = new FieldType();
            FLOAT_F.setIndexed(true);
            FLOAT_F.setStored(true);
            FLOAT_F.setNumericType(FieldType.NumericType.FLOAT);
      */

    /*        final FieldType STR_DV = new FieldType();
            STR_DV.setIndexed(true);
            STR_DV.setStored(true);
            STR_DV.setDocValueType(FieldInfo.DocValuesType.SORTED);*/

    IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX),
            new IndexWriterConfig(LUCENE_VERSION, analyzer));
    {
        Document document = new Document();
        document.add(new Field(ID, "1", MULTI_F));
        document.add(new Field(SEARCH, SEARCH_CONTENT, SEARCH_F));
        document.add(new Field(SINGLE, SINGLE_CONTENT, MULTI_F));
        document.add(new Field(MULTI, MULTI_CONTENT_1, MULTI_F));
        document.add(new Field(MULTI, MULTI_CONTENT_2, MULTI_F));
        document.add(new LongField(LONG, LONG_CONTENT, LONG_F));
        //            document.add(new DoubleField(DOUBLE, DOUBLE_CONTENT, DOUBLE_F));
        //            document.add(new FloatField(FLOAT, FLOAT_CONTENT, FLOAT_F));
        document.add(new SortedDocValuesField(DV, new BytesRef(DV_CONTENT)));
        indexWriter.addDocument(document);
    }
    indexWriter.commit();
    indexWriter.close();
    return INDEX;
}

From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedDocValuesWrapper.java

License:Apache License

private List<BytesRef> fill() throws IOException {
    final SortedSet<BytesRef> values = new TreeSet<>();
    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        String value = reader.document(docID, FIELDS).get(field.getName());
        //System.out.println(value);
        if (value != null) {
            values.add(new BytesRef(value));
        }/*w  w w  . j  a  v a2s .c om*/
    }
    return new ArrayList<>(values);
}

From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedDocValuesWrapper.java

License:Apache License

@Override
public int getOrd(int docID) {
    tracker.ping(docID);//  w  w  w.  j a v a2  s. c  o m

    try {
        String value = reader.document(docID, FIELDS).get(field.getName());
        if (value == null) {
            return -1;
        }
        int ord = Collections.binarySearch(values, new BytesRef(value));
        if (ord < 0) {
            throw new IllegalStateException("The ord for value '" + value + "' for docID " + docID
                    + " in field '" + field + "' could not be located but should always be present");
        }
        return ord;
    } catch (IOException e) {
        throw new RuntimeException("Unable to lookup docID=" + docID + ", field=" + field, e);
    }
}

From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedSetDocValuesWrapper.java

License:Apache License

private List<BytesRef> fill() throws IOException {
    // TODO: Is this sort the same as the default BytesRef-based sort for DocValues?
    final SortedSet<BytesRef> values = new TreeSet<>();
    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        String[] stored = reader.document(docID, FIELDS).getValues(field.getName());
        if (stored == null) {
            continue;
        }//from w w w  .j a v  a 2  s. c  o  m
        for (String value : stored) {
            values.add(new BytesRef(value));
        }
    }
    return new ArrayList<>(values);
}