Example usage for org.apache.lucene.util BytesRef BytesRef

List of usage examples for org.apache.lucene.util BytesRef BytesRef

Introduction

In this page you can find the example usage for org.apache.lucene.util BytesRef BytesRef.

Prototype

public BytesRef(CharSequence text) 

Source Link

Document

Initialize the byte[] from the UTF8 bytes for the provided String.

Usage

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefArray() throws Exception {
    final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("baz"));

    Assert.assertEquals("Not all items streamed.", 3L, StreamUtils.stream(bArr).count());

    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils/*from w  w w .  j  ava2 s . c  om*/
                    .stream(bArr).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefArray_nonUnique() throws Exception {
    final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("foo"));
    bArr.append(new BytesRef("baz"));
    bArr.append(new BytesRef("bar"));
    bArr.append(new BytesRef("foo"));

    Assert.assertEquals("Not all items streamed.", 6L, StreamUtils.stream(bArr).count());

    Assert.assertEquals("Term count mismatch.", 3L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term count mismatch.", 2L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils/*from w  ww. j  a va2 s  .c om*/
                    .stream(bArr).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_termsEnum() throws Exception {
    final BytesRef[] terms = { new BytesRef("foo"), new BytesRef("bar"), new BytesRef("baz") };

    final class TEnum extends TermsEnum {
        int idx = 0;

        @Override/*from   www  . j a v a  2 s.c  o m*/
        public SeekStatus seekCeil(final BytesRef text) {
            throw new UnsupportedOperationException();
        }

        @Override
        public void seekExact(final long ord) {
            throw new UnsupportedOperationException();
        }

        @Override
        public BytesRef term() {
            throw new UnsupportedOperationException();
        }

        @Override
        public long ord() {
            throw new UnsupportedOperationException();
        }

        @Override
        public int docFreq() {
            throw new UnsupportedOperationException();
        }

        @Override
        public long totalTermFreq() {
            throw new UnsupportedOperationException();
        }

        @Override
        public PostingsEnum postings(final Bits liveDocs, final PostingsEnum reuse, final int flags) {
            throw new UnsupportedOperationException();
        }

        @Nullable
        @Override
        public BytesRef next() {
            if (this.idx < terms.length) {
                return terms[this.idx++];
            }
            return null;
        }
    }

    Assert.assertEquals("Not all terms streamed.", (long) terms.length,
            StreamUtils.stream(new TEnum()).count());

    Assert.assertEquals("Term count mismatch.", 1L,
            StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term count mismatch.", 1L,
            StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term count mismatch.", 1L,
            StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils
                    .stream(new TEnum()).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java

License:Open Source License

@Test
public void testStream_bytesRefHash() throws Exception {
    final BytesRefHash brh = new BytesRefHash();
    brh.add(new BytesRef("foo"));
    brh.add(new BytesRef("bar"));
    brh.add(new BytesRef("baz"));

    Assert.assertEquals("Not all terms streamed.", 3L, StreamUtils.stream(brh).count());

    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("foo"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("bar"))).count());
    Assert.assertEquals("Term not found.", 1L,
            StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("baz"))).count());

    Assert.assertEquals("Unknown term found.", 0L,
            StreamUtils//from w w w.j  av  a  2 s  .c o  m
                    .stream(brh).filter(t -> !t.bytesEquals(new BytesRef("foo"))
                            && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz")))
                    .count());
}

From source file:dk.netarkivet.common.distribute.arcrepository.ARCLookup.java

License:Open Source License

/**
 * Lucene Lookup. //from   w w w  .j  a va2  s  . com
 *
 * @param uri A URI to look for.
 * @return The file and offset where that URI can be found, or null if it doesn't exist. 
 */
private ARCKey luceneLookUp(String uri) {
    BytesRef uriRef = new BytesRef(uri.getBytes()); // Should we decide which charset?

    Query query = new ConstantScoreQuery(
            new TermRangeFilter(DigestIndexer.FIELD_URL, uriRef, uriRef, true, true));

    try {
        AllDocsCollector allResultsCollector = new AllDocsCollector();
        luceneSearcher.search(query, allResultsCollector);
        Document doc = null;
        List<ScoreDoc> hits = allResultsCollector.getHits();
        if (hits != null) {
            log.debug("Found {} hits for uri: {}", hits.size(), uri);
            int i = 0;
            for (ScoreDoc hit : hits) {
                int docId = hit.doc;
                doc = luceneSearcher.doc(docId);
                String origin = doc.get(DigestIndexer.FIELD_ORIGIN);
                // Here is where we will handle multiple hits in the future
                if (origin == null) {
                    log.debug("No origin for URL '{}' hit {}", uri, i++);
                    continue;
                }
                String[] originParts = origin.split(",");
                if (originParts.length != 2) {
                    throw new IllegalState("Bad origin for URL '" + uri + "': '" + origin + "'");
                }
                log.debug("Found document with origin: {}", origin);
                return new ARCKey(originParts[0], Long.parseLong(originParts[1]));
            }
        }
    } catch (IOException e) {
        throw new IOFailure("Fatal error looking up '" + uri + "'", e);
    }
    return null;
}

From source file:dk.netarkivet.harvester.indexserver.DedupCrawlLogIndexCacheTester.java

License:Open Source License

private void verifySearchResult(Map<String, String> origins, IndexSearcher index) throws IOException {
    Set<String> urls = new HashSet<String>(origins.keySet());

    for (String urlValue : urls) {
        BytesRef uriRef = new BytesRef(urlValue);
        Query q = new ConstantScoreQuery(
                new TermRangeFilter(DigestIndexer.FIELD_URL, uriRef, uriRef, true, true));
        AllDocsCollector collector = new AllDocsCollector();
        index.search(q, collector);//from  w  w  w  .  j  a  v  a  2s . co  m
        List<ScoreDoc> hits = collector.getHits();
        for (ScoreDoc hit : hits) {
            int docID = hit.doc;
            Document doc = index.doc(docID);
            String url = doc.get("url");
            String origin = doc.get("origin");
            assertEquals("Should have correct origin for url " + url, origins.get(url), origin);
            // Ensure that each occurs only once.
            String removedValue = origins.remove(url);
            if (removedValue == null) {
                // System.out.println("'" + url + "' not found in origins map");
            } else {
                // System.out.println("'" + url + "' was found in origins map");
            }
        }
    }
}

From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java

License:Apache License

public static File generateIndex() throws IOException {
    final File INDEX = new File("target/testindex.deletefreely");
    Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION);

    final FieldType SINGLE_F = new FieldType();
    SINGLE_F.setIndexed(true);/*from   w w w  . j av a2  s . c o m*/
    SINGLE_F.setStored(true);

    final FieldType MULTI_F = new FieldType();
    MULTI_F.setIndexed(true);
    MULTI_F.setStored(true);

    final FieldType SEARCH_F = new FieldType();
    SEARCH_F.setIndexed(true);

    final FieldType LONG_F = new FieldType();
    LONG_F.setIndexed(true);
    LONG_F.setStored(true);
    LONG_F.setNumericType(FieldType.NumericType.LONG);

    /*        final FieldType DOUBLE_F = new FieldType();
            DOUBLE_F.setIndexed(true);
            DOUBLE_F.setStored(true);
            DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE);
            
            final FieldType FLOAT_F = new FieldType();
            FLOAT_F.setIndexed(true);
            FLOAT_F.setStored(true);
            FLOAT_F.setNumericType(FieldType.NumericType.FLOAT);
      */

    /*        final FieldType STR_DV = new FieldType();
            STR_DV.setIndexed(true);
            STR_DV.setStored(true);
            STR_DV.setDocValueType(FieldInfo.DocValuesType.SORTED);*/

    IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX),
            new IndexWriterConfig(LUCENE_VERSION, analyzer));
    {
        Document document = new Document();
        document.add(new Field(ID, "1", MULTI_F));
        document.add(new Field(SEARCH, SEARCH_CONTENT, SEARCH_F));
        document.add(new Field(SINGLE, SINGLE_CONTENT, MULTI_F));
        document.add(new Field(MULTI, MULTI_CONTENT_1, MULTI_F));
        document.add(new Field(MULTI, MULTI_CONTENT_2, MULTI_F));
        document.add(new LongField(LONG, LONG_CONTENT, LONG_F));
        //            document.add(new DoubleField(DOUBLE, DOUBLE_CONTENT, DOUBLE_F));
        //            document.add(new FloatField(FLOAT, FLOAT_CONTENT, FLOAT_F));
        document.add(new SortedDocValuesField(DV, new BytesRef(DV_CONTENT)));
        indexWriter.addDocument(document);
    }
    indexWriter.commit();
    indexWriter.close();
    return INDEX;
}

From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedDocValuesWrapper.java

License:Apache License

private List<BytesRef> fill() throws IOException {
    final SortedSet<BytesRef> values = new TreeSet<>();
    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        String value = reader.document(docID, FIELDS).get(field.getName());
        //System.out.println(value);
        if (value != null) {
            values.add(new BytesRef(value));
        }/*w  w w  . j  a  v a2s .c om*/
    }
    return new ArrayList<>(values);
}

From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedDocValuesWrapper.java

License:Apache License

@Override
public int getOrd(int docID) {
    tracker.ping(docID);//  w  w  w.  j a v a2  s. c  o m

    try {
        String value = reader.document(docID, FIELDS).get(field.getName());
        if (value == null) {
            return -1;
        }
        int ord = Collections.binarySearch(values, new BytesRef(value));
        if (ord < 0) {
            throw new IllegalStateException("The ord for value '" + value + "' for docID " + docID
                    + " in field '" + field + "' could not be located but should always be present");
        }
        return ord;
    } catch (IOException e) {
        throw new RuntimeException("Unable to lookup docID=" + docID + ", field=" + field, e);
    }
}

From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedSetDocValuesWrapper.java

License:Apache License

private List<BytesRef> fill() throws IOException {
    // TODO: Is this sort the same as the default BytesRef-based sort for DocValues?
    final SortedSet<BytesRef> values = new TreeSet<>();
    for (int docID = 0; docID < reader.maxDoc(); docID++) {
        String[] stored = reader.document(docID, FIELDS).getValues(field.getName());
        if (stored == null) {
            continue;
        }//from w w w  .j a v  a 2  s. c  o  m
        for (String value : stored) {
            values.add(new BytesRef(value));
        }
    }
    return new ArrayList<>(values);
}