List of usage examples for org.apache.lucene.document Field tokenStreamValue
public TokenStream tokenStreamValue()
From source file:engine.easy.indexer.writer.EasySearchIndexWriter.java
License:Apache License
/** * Read the extra data field information * /* w w w. j a va 2s . c om*/ * @return it returns the no: of token streams for the extra data field information. * @throws IOException if the file would have any IO operation. */ private int[] extraData(Field field, Analyzer analyzer) throws IOException { if (!field.isIndexed()) return null; if (!field.isTokenized()) return (new int[] { 1, 1 }); String strv = field.stringValue(); int v[]; if (strv == null) { Reader readerv = field.readerValue(); if (readerv == null) { TokenStream tsv = field.tokenStreamValue(); if (tsv == null) { throw new IllegalArgumentException( (new StringBuilder("Cannot obtain field value. field_name: ")).append(field.name()) .append(".").toString()); } else { v = countTokenStream(tsv); return v; } } strv = readAll(readerv); if (strv == null) throw new IllegalArgumentException((new StringBuilder("Cannot obtain field value. field_name: ")) .append(field.name()).append(".").toString()); field.setValue(strv); } BufferedReader reader = new BufferedReader(new StringReader(strv)); TokenStream ts = analyzer.tokenStream(field.name(), reader); v = countTokenStream(ts); ts.close(); reader.close(); return v; }
From source file:org.apache.solr.schema.JsonPreAnalyzedParser.java
License:Apache License
@Override public String toFormattedString(Field f) throws IOException { Map<String, Object> map = new LinkedHashMap<String, Object>(); map.put(VERSION_KEY, VERSION);/*ww w . j av a2s. co m*/ if (f.fieldType().stored()) { String stringValue = f.stringValue(); if (stringValue != null) { map.put(STRING_KEY, stringValue); } BytesRef binaryValue = f.binaryValue(); if (binaryValue != null) { map.put(BINARY_KEY, Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length)); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>(); while (ts.incrementToken()) { Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; Map<String, Object> tok = new TreeMap<String, Object>(); while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); if (!ts.hasAttribute(cl)) { continue; } Attribute att = ts.getAttribute(cl); if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute) att; cTerm = new String(catt.buffer(), 0, catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att; tTerm = tatt.getBytesRef().utf8ToString(); } else { if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset()); tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute) att).getPayload(); if (p != null && p.length > 0) { tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length)); } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.put(TYPE_KEY, ((TypeAttribute) att).type()); } else { tok.put(cl.getName(), att.toString()); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { tok.put(TOKEN_KEY, term); } tokens.add(tok); } map.put(TOKENS_KEY, tokens); } return JSONUtil.toJSON(map, -1); }
From source file:org.apache.solr.schema.SimplePreAnalyzedParser.java
License:Apache License
@Override public String toFormattedString(Field f) throws IOException { StringBuilder sb = new StringBuilder(); sb.append(VERSION + " "); if (f.fieldType().stored()) { String s = f.stringValue(); if (s != null) { // encode the equals sign s = s.replaceAll("=", "\\="); sb.append('='); sb.append(s);//from w ww. jav a2s.c o m sb.append('='); } } TokenStream ts = f.tokenStreamValue(); if (ts != null) { StringBuilder tok = new StringBuilder(); boolean next = false; while (ts.incrementToken()) { if (next) { sb.append(' '); } else { next = true; } tok.setLength(0); Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator(); String cTerm = null; String tTerm = null; while (it.hasNext()) { Class<? extends Attribute> cl = it.next(); if (!ts.hasAttribute(cl)) { continue; } Attribute att = ts.getAttribute(cl); if (cl.isAssignableFrom(CharTermAttribute.class)) { CharTermAttribute catt = (CharTermAttribute) att; cTerm = escape(catt.buffer(), catt.length()); } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) { TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att; char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray(); tTerm = escape(tTermChars, tTermChars.length); } else { if (tok.length() > 0) tok.append(','); if (cl.isAssignableFrom(FlagsAttribute.class)) { tok.append("f=" + Integer.toHexString(((FlagsAttribute) att).getFlags())); } else if (cl.isAssignableFrom(OffsetAttribute.class)) { tok.append("s=" + ((OffsetAttribute) att).startOffset() + ",e=" + ((OffsetAttribute) att).endOffset()); } else if (cl.isAssignableFrom(PayloadAttribute.class)) { BytesRef p = ((PayloadAttribute) att).getPayload(); if (p != null && p.length > 0) { tok.append("p=" + bytesToHex(p.bytes, p.offset, p.length)); } else if (tok.length() > 0) { tok.setLength(tok.length() - 1); // remove the last comma } } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) { tok.append("i=" + ((PositionIncrementAttribute) att).getPositionIncrement()); } else if (cl.isAssignableFrom(TypeAttribute.class)) { tok.append("y=" + escape(((TypeAttribute) att).type())); } else { tok.append(cl.getName() + "=" + escape(att.toString())); } } } String term = null; if (cTerm != null) { term = cTerm; } else { term = tTerm; } if (term != null && term.length() > 0) { if (tok.length() > 0) { tok.insert(0, term + ","); } else { tok.insert(0, term); } } sb.append(tok); } } return sb.toString(); }
From source file:org.apache.solr.update.processor.PreAnalyzedUpdateProcessorTest.java
License:Apache License
private void test(String chain, String[] title, String[] teststop) throws Exception { SolrInputDocument doc = processAdd(chain, doc(f("id", "1"), f("title", title[0]), f("teststop", teststop[0]), f("nonexistent", "foobar"), f("ssto", teststop[0]), f("sind", teststop[0]))); assertEquals("title should be unchanged", title[0], doc.getFieldValue("title")); assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field); Field f = (Field) doc.getFieldValue("teststop"); assertEquals("teststop should have stringValue", "this is a test.", f.stringValue()); assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue()); assertNull("nonexistent should be dropped", doc.getField("nonexistent")); // check how SchemaField type affects stored/indexed part processing f = (Field) doc.getFieldValue("ssto"); assertNotNull("should have ssto", f); assertNotNull("should have stringValue", f.stringValue()); assertNull("should not have tokenStreamValue", f.tokenStreamValue()); f = (Field) doc.getFieldValue("sind"); assertNotNull("should have sind", f); assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue()); assertNotNull("should have tokenStreamValue", f.tokenStreamValue()); doc = processAdd(chain, doc(f("id", "2"), f("title", title[1]), f("teststop", teststop[1]), f("nonexistent", "foobar"), f("ssto", teststop[1]), f("sind", teststop[1]))); assertTrue("title should be a Field", doc.getFieldValue("title") instanceof Field); assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field); f = (Field) doc.getFieldValue("teststop"); assertEquals("teststop should have stringValue", "this is a test.", f.stringValue()); assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue()); assertNull("nonexistent should be dropped", doc.getField("nonexistent")); // check how SchemaField type affects stored/indexed part processing f = (Field) doc.getFieldValue("ssto"); assertNotNull("should have ssto", f); assertNotNull("should have stringValue", f.stringValue()); assertNull("should not have tokenStreamValue", f.tokenStreamValue()); f = (Field) doc.getFieldValue("sind"); assertNotNull("should have sind", f); assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue()); assertNotNull("should have tokenStreamValue", f.tokenStreamValue()); assertU(commit());//from ww w . j a v a2s . c om assertQ(req("teststop:\"one two three\""), "//str[@name='id'][.='1']", "//str[@name='teststop'][.='this is a test.']"); assertQ(req("teststop:three"), "//*[@numFound='2']", "//result/doc[1]/str[@name='id'][.='1']", "//result/doc[1]/str[@name='title'][.='not pre-analyzed']", "//result/doc[2]/str[@name='id'][.='2']", "//result/doc[2]/arr[@name='title']/str[.='string value']"); assertQ(req("ssto:three"), "//*[@numFound='0']"); assertQ(req("sind:three"), "//*[@numFound='2']"); }
From source file:org.apache.uima.lucas.indexer.FieldBuilderTest.java
License:Apache License
@Test public void testCreateFieldConcatenated() throws Exception { fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamConcatenator.class), isA(Collection.class))) .andReturn(tokenStream);/*from ww w .ja v a 2 s . c o m*/ replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertEquals(tokenStream, field1.tokenStreamValue()); }
From source file:org.apache.uima.lucas.indexer.FieldBuilderTest.java
License:Apache License
@Test public void testCreateFieldMerged() throws Exception { fieldDescription.setMerge(true);/* w w w. j a va2 s. c o m*/ fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES); TokenStream tokenStream = createMock(TokenStream.class); expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream); replay(filterBuilder); Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription); verify(filterBuilder); Iterator<Field> fieldIterator = fields.iterator(); Field field1 = fieldIterator.next(); assertEquals("field1", field1.name()); assertEquals(tokenStream, field1.tokenStreamValue()); }
From source file:org.apache.uima.lucas.ProspectiveSearchAE.java
License:Apache License
@Override public void process(CAS aCAS) throws AnalysisEngineProcessException { // First create the index of the document text MemoryIndex index = new MemoryIndex(); List fields = createDocument(aCAS).getFields(); for (Iterator it = fields.iterator(); it.hasNext();) { Field field = (Field) it.next(); if (field.isIndexed() && field.tokenStreamValue() != null) { index.addField(field.name(), field.tokenStreamValue()); }/* w ww . j av a 2 s .c o m*/ } // Search all queries against the one document index for (SearchQuery query : searchQueryProvider.getSearchQueries(aCAS)) { float score = index.search(query.query()); if (score > matchingThreshold) { // Add a FS to the CAS with the search result FeatureStructure searchResult = aCAS.createFS(searchResultType); searchResult.setLongValue(searchResultIdFeature, query.id()); aCAS.addFsToIndexes(searchResult); // Find matching tokens and link their annotations // in case the user wants search term highlighting if (searchResultMatchingTextFeature != null) { fields = createDocument(aCAS).getFields(); for (Iterator it = fields.iterator(); it.hasNext();) { Field field = (Field) it.next(); if (field.isIndexed() && field.tokenStreamValue() != null) { TokenStream tokenStream = field.tokenStreamValue(); Collection<AnnotationFS> matchingTextAnnotations = new LinkedList<AnnotationFS>(); QueryScorer scorer = new QueryScorer(query.query(), field.name()); scorer.startFragment(new TextFragment(new StringBuffer(aCAS.getDocumentText()), 0, 0)); try { scorer.init(tokenStream); OffsetAttribute offsetAttr = null; while (tokenStream.incrementToken()) { offsetAttr = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class); float tokenScore = scorer.getTokenScore(); if (tokenScore > 0) { AnnotationFS annotation = aCAS.createAnnotation(matchingTextType, offsetAttr.startOffset(), offsetAttr.endOffset()); matchingTextAnnotations.add(annotation); } } } catch (IOException e) { throw new AnalysisEngineProcessException(e); } ArrayFS matchtingTextArray = aCAS.createArrayFS(matchingTextAnnotations.size()); int matchtingTextArrayIndex = 0; for (AnnotationFS matchingTextAnnotation : matchingTextAnnotations) { matchtingTextArray.set(matchtingTextArrayIndex++, matchingTextAnnotation); } searchResult.setFeatureValue(searchResultMatchingTextFeature, matchtingTextArray); } } } } } }
From source file:org.elasticsearch.index.mapper.json.all.SimpleAllMapperTests.java
License:Apache License
@Test public void testSimpleAllMappers() throws Exception { String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/json/all/mapping.json"); JsonDocumentMapper docMapper = (JsonDocumentMapper) new JsonDocumentMapperParser( new AnalysisService(new Index("test"))).parse(mapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/json/all/test1.json"); Document doc = docMapper.parse(json).doc(); Field field = doc.getField("_all"); AllEntries allEntries = ((AllTokenFilter) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); }
From source file:org.elasticsearch.index.mapper.json.all.SimpleAllMapperTests.java
License:Apache License
@Test public void testSimpleAllMappersWithReparse() throws Exception { String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/json/all/mapping.json"); JsonDocumentMapper docMapper = (JsonDocumentMapper) new JsonDocumentMapperParser( new AnalysisService(new Index("test"))).parse(mapping); String builtMapping = docMapper.buildSource(); // System.out.println(builtMapping); // reparse it JsonDocumentMapper builtDocMapper = (JsonDocumentMapper) new JsonDocumentMapperParser( new AnalysisService(new Index("test"))).parse(builtMapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/json/all/test1.json"); Document doc = builtDocMapper.parse(json).doc(); Field field = doc.getField("_all"); AllEntries allEntries = ((AllTokenFilter) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); }
From source file:org.elasticsearch.index.mapper.json.all.SimpleAllMapperTests.java
License:Apache License
@Test public void testSimpleAllMappersWithStore() throws Exception { String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/json/all/store-mapping.json"); JsonDocumentMapper docMapper = (JsonDocumentMapper) new JsonDocumentMapperParser( new AnalysisService(new Index("test"))).parse(mapping); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/json/all/test1.json"); Document doc = docMapper.parse(json).doc(); Field field = doc.getField("_all"); AllEntries allEntries = ((AllTokenFilter) field.tokenStreamValue()).allEntries(); assertThat(allEntries.fields().size(), equalTo(2)); assertThat(allEntries.fields().contains("name.last"), equalTo(true)); assertThat(allEntries.fields().contains("simple1"), equalTo(true)); String text = field.stringValue(); assertThat(text, equalTo(allEntries.buildText())); }