List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java
License:Open Source License
@Test public void testStream_bytesRefArray() throws Exception { final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false)); bArr.append(new BytesRef("foo")); bArr.append(new BytesRef("bar")); bArr.append(new BytesRef("baz")); Assert.assertEquals("Not all items streamed.", 3L, StreamUtils.stream(bArr).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count()); Assert.assertEquals("Unknown term found.", 0L, StreamUtils/*from w w w . j ava2 s . c om*/ .stream(bArr).filter(t -> !t.bytesEquals(new BytesRef("foo")) && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz"))) .count()); }
From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java
License:Open Source License
@Test public void testStream_bytesRefArray_nonUnique() throws Exception { final BytesRefArray bArr = new BytesRefArray(Counter.newCounter(false)); bArr.append(new BytesRef("foo")); bArr.append(new BytesRef("bar")); bArr.append(new BytesRef("foo")); bArr.append(new BytesRef("baz")); bArr.append(new BytesRef("bar")); bArr.append(new BytesRef("foo")); Assert.assertEquals("Not all items streamed.", 6L, StreamUtils.stream(bArr).count()); Assert.assertEquals("Term count mismatch.", 3L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("foo"))).count()); Assert.assertEquals("Term count mismatch.", 2L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("bar"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(bArr).filter(br -> br.bytesEquals(new BytesRef("baz"))).count()); Assert.assertEquals("Unknown term found.", 0L, StreamUtils/*from w ww. j a va2 s .c om*/ .stream(bArr).filter(t -> !t.bytesEquals(new BytesRef("foo")) && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz"))) .count()); }
From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java
License:Open Source License
@Test public void testStream_termsEnum() throws Exception { final BytesRef[] terms = { new BytesRef("foo"), new BytesRef("bar"), new BytesRef("baz") }; final class TEnum extends TermsEnum { int idx = 0; @Override/*from www . j a v a 2 s.c o m*/ public SeekStatus seekCeil(final BytesRef text) { throw new UnsupportedOperationException(); } @Override public void seekExact(final long ord) { throw new UnsupportedOperationException(); } @Override public BytesRef term() { throw new UnsupportedOperationException(); } @Override public long ord() { throw new UnsupportedOperationException(); } @Override public int docFreq() { throw new UnsupportedOperationException(); } @Override public long totalTermFreq() { throw new UnsupportedOperationException(); } @Override public PostingsEnum postings(final Bits liveDocs, final PostingsEnum reuse, final int flags) { throw new UnsupportedOperationException(); } @Nullable @Override public BytesRef next() { if (this.idx < terms.length) { return terms[this.idx++]; } return null; } } Assert.assertEquals("Not all terms streamed.", (long) terms.length, StreamUtils.stream(new TEnum()).count()); Assert.assertEquals("Term count mismatch.", 1L, StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("foo"))).count()); Assert.assertEquals("Term count mismatch.", 1L, StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("bar"))).count()); Assert.assertEquals("Term count mismatch.", 1L, StreamUtils.stream(new TEnum()).filter(t -> t.bytesEquals(new BytesRef("baz"))).count()); Assert.assertEquals("Unknown term found.", 0L, StreamUtils .stream(new TEnum()).filter(t -> !t.bytesEquals(new BytesRef("foo")) && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz"))) .count()); }
From source file:de.unihildesheim.iw.lucene.util.StreamUtilsTest.java
License:Open Source License
@Test public void testStream_bytesRefHash() throws Exception { final BytesRefHash brh = new BytesRefHash(); brh.add(new BytesRef("foo")); brh.add(new BytesRef("bar")); brh.add(new BytesRef("baz")); Assert.assertEquals("Not all terms streamed.", 3L, StreamUtils.stream(brh).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("foo"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("bar"))).count()); Assert.assertEquals("Term not found.", 1L, StreamUtils.stream(brh).filter(br -> br.bytesEquals(new BytesRef("baz"))).count()); Assert.assertEquals("Unknown term found.", 0L, StreamUtils//from w w w.j av a 2 s .c o m .stream(brh).filter(t -> !t.bytesEquals(new BytesRef("foo")) && !t.bytesEquals(new BytesRef("bar")) && !t.bytesEquals(new BytesRef("baz"))) .count()); }
From source file:dk.netarkivet.common.distribute.arcrepository.ARCLookup.java
License:Open Source License
/** * Lucene Lookup. //from w w w .j a va2 s . com * * @param uri A URI to look for. * @return The file and offset where that URI can be found, or null if it doesn't exist. */ private ARCKey luceneLookUp(String uri) { BytesRef uriRef = new BytesRef(uri.getBytes()); // Should we decide which charset? Query query = new ConstantScoreQuery( new TermRangeFilter(DigestIndexer.FIELD_URL, uriRef, uriRef, true, true)); try { AllDocsCollector allResultsCollector = new AllDocsCollector(); luceneSearcher.search(query, allResultsCollector); Document doc = null; List<ScoreDoc> hits = allResultsCollector.getHits(); if (hits != null) { log.debug("Found {} hits for uri: {}", hits.size(), uri); int i = 0; for (ScoreDoc hit : hits) { int docId = hit.doc; doc = luceneSearcher.doc(docId); String origin = doc.get(DigestIndexer.FIELD_ORIGIN); // Here is where we will handle multiple hits in the future if (origin == null) { log.debug("No origin for URL '{}' hit {}", uri, i++); continue; } String[] originParts = origin.split(","); if (originParts.length != 2) { throw new IllegalState("Bad origin for URL '" + uri + "': '" + origin + "'"); } log.debug("Found document with origin: {}", origin); return new ARCKey(originParts[0], Long.parseLong(originParts[1])); } } } catch (IOException e) { throw new IOFailure("Fatal error looking up '" + uri + "'", e); } return null; }
From source file:dk.netarkivet.harvester.indexserver.DedupCrawlLogIndexCacheTester.java
License:Open Source License
private void verifySearchResult(Map<String, String> origins, IndexSearcher index) throws IOException { Set<String> urls = new HashSet<String>(origins.keySet()); for (String urlValue : urls) { BytesRef uriRef = new BytesRef(urlValue); Query q = new ConstantScoreQuery( new TermRangeFilter(DigestIndexer.FIELD_URL, uriRef, uriRef, true, true)); AllDocsCollector collector = new AllDocsCollector(); index.search(q, collector);//from w w w . j a v a 2s . co m List<ScoreDoc> hits = collector.getHits(); for (ScoreDoc hit : hits) { int docID = hit.doc; Document doc = index.doc(docID); String url = doc.get("url"); String origin = doc.get("origin"); assertEquals("Should have correct origin for url " + url, origins.get(url), origin); // Ensure that each occurs only once. String removedValue = origins.remove(url); if (removedValue == null) { // System.out.println("'" + url + "' not found in origins map"); } else { // System.out.println("'" + url + "' was found in origins map"); } } } }
From source file:dk.statsbiblioteket.netark.dvenabler.DVReaderTest.java
License:Apache License
public static File generateIndex() throws IOException { final File INDEX = new File("target/testindex.deletefreely"); Analyzer analyzer = new StandardAnalyzer(LUCENE_VERSION); final FieldType SINGLE_F = new FieldType(); SINGLE_F.setIndexed(true);/*from w w w . j av a2 s . c o m*/ SINGLE_F.setStored(true); final FieldType MULTI_F = new FieldType(); MULTI_F.setIndexed(true); MULTI_F.setStored(true); final FieldType SEARCH_F = new FieldType(); SEARCH_F.setIndexed(true); final FieldType LONG_F = new FieldType(); LONG_F.setIndexed(true); LONG_F.setStored(true); LONG_F.setNumericType(FieldType.NumericType.LONG); /* final FieldType DOUBLE_F = new FieldType(); DOUBLE_F.setIndexed(true); DOUBLE_F.setStored(true); DOUBLE_F.setNumericType(FieldType.NumericType.DOUBLE); final FieldType FLOAT_F = new FieldType(); FLOAT_F.setIndexed(true); FLOAT_F.setStored(true); FLOAT_F.setNumericType(FieldType.NumericType.FLOAT); */ /* final FieldType STR_DV = new FieldType(); STR_DV.setIndexed(true); STR_DV.setStored(true); STR_DV.setDocValueType(FieldInfo.DocValuesType.SORTED);*/ IndexWriter indexWriter = new IndexWriter(MMapDirectory.open(INDEX), new IndexWriterConfig(LUCENE_VERSION, analyzer)); { Document document = new Document(); document.add(new Field(ID, "1", MULTI_F)); document.add(new Field(SEARCH, SEARCH_CONTENT, SEARCH_F)); document.add(new Field(SINGLE, SINGLE_CONTENT, MULTI_F)); document.add(new Field(MULTI, MULTI_CONTENT_1, MULTI_F)); document.add(new Field(MULTI, MULTI_CONTENT_2, MULTI_F)); document.add(new LongField(LONG, LONG_CONTENT, LONG_F)); // document.add(new DoubleField(DOUBLE, DOUBLE_CONTENT, DOUBLE_F)); // document.add(new FloatField(FLOAT, FLOAT_CONTENT, FLOAT_F)); document.add(new SortedDocValuesField(DV, new BytesRef(DV_CONTENT))); indexWriter.addDocument(document); } indexWriter.commit(); indexWriter.close(); return INDEX; }
From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedDocValuesWrapper.java
License:Apache License
private List<BytesRef> fill() throws IOException { final SortedSet<BytesRef> values = new TreeSet<>(); for (int docID = 0; docID < reader.maxDoc(); docID++) { String value = reader.document(docID, FIELDS).get(field.getName()); //System.out.println(value); if (value != null) { values.add(new BytesRef(value)); }/*w w w . j a v a2s .c om*/ } return new ArrayList<>(values); }
From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedDocValuesWrapper.java
License:Apache License
@Override public int getOrd(int docID) { tracker.ping(docID);// w w w. j a v a2 s. c o m try { String value = reader.document(docID, FIELDS).get(field.getName()); if (value == null) { return -1; } int ord = Collections.binarySearch(values, new BytesRef(value)); if (ord < 0) { throw new IllegalStateException("The ord for value '" + value + "' for docID " + docID + " in field '" + field + "' could not be located but should always be present"); } return ord; } catch (IOException e) { throw new RuntimeException("Unable to lookup docID=" + docID + ", field=" + field, e); } }
From source file:dk.statsbiblioteket.netark.dvenabler.wrapper.SortedSetDocValuesWrapper.java
License:Apache License
private List<BytesRef> fill() throws IOException { // TODO: Is this sort the same as the default BytesRef-based sort for DocValues? final SortedSet<BytesRef> values = new TreeSet<>(); for (int docID = 0; docID < reader.maxDoc(); docID++) { String[] stored = reader.document(docID, FIELDS).getValues(field.getName()); if (stored == null) { continue; }//from w w w .j a v a 2 s. c o m for (String value : stored) { values.add(new BytesRef(value)); } } return new ArrayList<>(values); }