List of usage examples for org.apache.solr.schema IndexSchema getFieldOrNull
public SchemaField getFieldOrNull(String fieldName)
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
private boolean useFastVectorHighlighter(SolrParams params, IndexSchema schema, String fieldName) { SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField == null) return false; boolean useFvhParam = params.getFieldBool(fieldName, HighlightParams.USE_FVH, false); if (!useFvhParam) return false; boolean termPosOff = schemaField.storeTermPositions() && schemaField.storeTermOffsets(); if (!termPosOff) { log.warn(/* ww w .j ava 2 s. c om*/ "Solr will use Highlighter instead of FastVectorHighlighter because {} field does not store TermPositions and TermOffsets.", fieldName); } return termPosOff; }
From source file:com.o19s.solr.swan.highlight.SwanHighlighter.java
License:Apache License
private void doHighlightingByHighlighter(Query query, SolrQueryRequest req, NamedList docSummaries, int docId, Document doc, String fieldName) throws IOException { final SolrIndexSearcher searcher = req.getSearcher(); final IndexSchema schema = searcher.getSchema(); // TODO: Currently in trunk highlighting numeric fields is broken (Lucene) - // so we disable them until fixed (see LUCENE-3080)! // BEGIN: Hack final SchemaField schemaField = schema.getFieldOrNull(fieldName); if (schemaField != null && ((schemaField.getType() instanceof org.apache.solr.schema.TrieField) || (schemaField.getType() instanceof org.apache.solr.schema.TrieDateField))) return;//from www .j a v a2 s. com // END: Hack SolrParams params = req.getParams(); IndexableField[] docFields = doc.getFields(fieldName); List<String> listFields = new ArrayList<String>(); for (IndexableField field : docFields) { listFields.add(field.stringValue()); } String[] docTexts = listFields.toArray(new String[listFields.size()]); // according to Document javadoc, doc.getValues() never returns null. check empty instead of null if (docTexts.length == 0) return; TokenStream tokenStream; int numFragments = getMaxSnippets(fieldName, params); boolean mergeContiguousFragments = isMergeContiguousFragments(fieldName, params); List<TextFragment> frags = new ArrayList<TextFragment>(); TermOffsetsTokenStream tots = null; // to be non-null iff we're using TermOffsets optimization try { // TokenStream tvStream = TokenSources.getTokenStream(searcher.getIndexReader(), docId, fieldName); // if (tvStream != null) { // tots = new TermOffsetsTokenStream(tvStream); // } } catch (IllegalArgumentException e) { // No problem. But we can't use TermOffsets optimization. } for (int j = 0; j < docTexts.length; j++) { if (tots != null) { // if we're using TermOffsets optimization, then get the next // field value's TokenStream (i.e. get field j's TokenStream) from tots: tokenStream = tots.getMultiValuedTokenStream(docTexts[j].length()); } else { // fall back to analyzer tokenStream = createAnalyzerTStream(schema, fieldName, docTexts[j]); } int maxCharsToAnalyze = params.getFieldInt(fieldName, HighlightParams.MAX_CHARS, Highlighter.DEFAULT_MAX_CHARS_TO_ANALYZE); Highlighter highlighter; if (Boolean.valueOf(req.getParams().get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true"))) { if (maxCharsToAnalyze < 0) { tokenStream = new CachingTokenFilter(tokenStream); } else { tokenStream = new CachingTokenFilter( new OffsetLimitTokenFilter(tokenStream, maxCharsToAnalyze)); } // get highlighter highlighter = getPhraseHighlighter(query, fieldName, req, (CachingTokenFilter) tokenStream); // after highlighter initialization, reset tstream since construction of highlighter already used it tokenStream.reset(); } else { // use "the old way" highlighter = getHighlighter(query, fieldName, req); } if (maxCharsToAnalyze < 0) { highlighter.setMaxDocCharsToAnalyze(docTexts[j].length()); } else { highlighter.setMaxDocCharsToAnalyze(maxCharsToAnalyze); } try { TextFragment[] bestTextFragments = highlighter.getBestTextFragments(tokenStream, docTexts[j], mergeContiguousFragments, numFragments); for (int k = 0; k < bestTextFragments.length; k++) { if ((bestTextFragments[k] != null) && (bestTextFragments[k].getScore() > 0)) { frags.add(bestTextFragments[k]); } } } catch (InvalidTokenOffsetsException e) { throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, e); } } // sort such that the fragments with the highest score come first Collections.sort(frags, new Comparator<TextFragment>() { public int compare(TextFragment arg0, TextFragment arg1) { return Math.round(arg1.getScore() - arg0.getScore()); } }); // convert fragments back into text // TODO: we can include score and position information in output as snippet attributes String[] summaries = null; if (frags.size() > 0) { ArrayList<String> fragTexts = new ArrayList<String>(); for (TextFragment fragment : frags) { if ((fragment != null) && (fragment.getScore() > 0)) { fragTexts.add(fragment.toString()); } if (fragTexts.size() >= numFragments) break; } summaries = (String[]) fragTexts.toArray(); if (summaries.length > 0) docSummaries.add(fieldName, summaries); } // no summeries made, copy text from alternate field if (summaries == null || summaries.length == 0) { alternateField(docSummaries, params, doc, fieldName); } }
From source file:com.sindicetech.siren.solr.facet.SirenFacetProcessor.java
License:Open Source License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { SolrCore core = cmd.getReq().getCore(); IndexSchema schema = core.getLatestSchema(); if (!schema.isMutable()) { throw new SolrException(BAD_REQUEST, String.format("This IndexSchema, of core %s, is not mutable.", core.getName())); }//from w w w . j a v a 2 s.c o m SolrInputDocument doc = cmd.getSolrInputDocument(); extractor.setSchema(schema); List<SirenFacetEntry> entries = extractor.extractFacets(doc); // update schema // use Sets so that we add a fieldname only once even if it is generated multiple times (for // multiple paths) Set<SchemaField> newFields = new HashSet<SchemaField>(); for (SirenFacetEntry entry : entries) { // skip entry if the field is already in the schema if (schema.getFieldOrNull(entry.toFieldName()) != null) { continue; } TypeMapping typeMapping = getTypeMappingValueClass(entry.datatype.xsdDatatype); // skip facet values that are too large altogether - they don't make sense for faceting if (entry.value instanceof String && ((String) entry.value) .length() > (typeMapping.maxFieldSize != null ? typeMapping.maxFieldSize : DEFAULT_MAX_FACET_VALUE_LENGTH)) { continue; } String fieldTypeName = getTypeMappingValueClass(entry.datatype.xsdDatatype).fieldType; Map<String, Boolean> options = new HashMap<String, Boolean>(); // see FieldProperties.propertyNames[] options.put("indexed", false); options.put("stored", false); options.put("docValues", true); options.put("multiValued", true); newFields.add(schema.newField(entry.toFieldName(), fieldTypeName, options)); } if (!newFields.isEmpty()) { IndexSchema newSchema = schema.addFields(newFields); cmd.getReq().getCore().setLatestSchema(newSchema); cmd.getReq().updateSchemaToLatest(); logger.debug("Successfully added field(s) to the schema."); } // update document for (SirenFacetEntry entry : entries) { TypeMapping typeMapping = getTypeMappingValueClass(entry.datatype.xsdDatatype); // skip facet values that are too large altogether - they don't make sense for faceting if (entry.value instanceof String && ((String) entry.value) .length() > (typeMapping.maxFieldSize != null ? typeMapping.maxFieldSize : DEFAULT_MAX_FACET_VALUE_LENGTH)) { continue; } doc.addField(entry.toFieldName(), entry.value); } // call the next one in chain super.processAdd(cmd); }
From source file:com.sindicetech.siren.solr.facet.TestSirenFacetProcessorFactory.java
License:Open Source License
public void testStringField() throws Exception { String json = "{\"knows\": [{\"name\":\"josef\"}, {\"name\":\"szymon\"}]}"; IndexSchema schema = h.getCore().getLatestSchema(); SolrInputDocument d = processAdd("generate-facets-processor", doc(f("id", "1"), f("json", json))); assertNotNull(d);//from w ww . j a v a 2s . co m schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("string.json.knows.name")); assertEquals("string", schema.getFieldType("string.json.knows.name").getTypeName()); json = "{\"knows\": [{\"name\": null}]}"; d = processAdd("generate-facets-processor", doc(f("id", "2"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("string.json.knows.name")); assertEquals("string", schema.getFieldType("string.json.knows.name").getTypeName()); json = "{\"knows\": {\"name\": true}}"; d = processAdd("generate-facets-processor", doc(f("id", "3"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("string.json.knows.name")); assertEquals("string", schema.getFieldType("string.json.knows.name").getTypeName()); json = "{\"age\": 1}"; d = processAdd("generate-facets-processor", doc(f("id", "4"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("long.json.age")); assertEquals("tlong", schema.getFieldType("long.json.age").getTypeName()); json = "{\"length\": 18.9}"; d = processAdd("generate-facets-processor", doc(f("id", "5"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("double.json.length")); assertEquals("tdouble", schema.getFieldType("double.json.length").getTypeName()); // no facets should be generated for fields with values longer than SirenFacetProcessor.MAX_FACET_VALUE_LENGTH String tooLongValue = "Every night and every morn Some to misery are born, " + "Every morn and every night Some are born to sweet delight. Some are born to sweet" + " delight, Some are born to endless night."; json = "{\"description\": \"" + tooLongValue + "\"}"; SirenFacetProcessorFactory factory = (SirenFacetProcessorFactory) h.getCore() .getUpdateProcessingChain("generate-facets-processor").getFactories()[0]; TypeMapping stringTypeMapping = factory.getTypeMappingValueClass(FacetDatatype.STRING.xsdDatatype); assertTrue("Bad test. Test value has to be longer than maxFieldSize for TypeMapping of string = " + stringTypeMapping.maxFieldSize + " but its length is only " + tooLongValue.length() + ". You should check the size of " + "SirenFacetProcessor.DEFAULT_MAX_FACET_VALUE_LENGTH or solrconfig*.xml for the maxFieldSize setting of the string typeMapping " + " of the updateRequestProcessorChain", tooLongValue.length() > stringTypeMapping.maxFieldSize); d = processAdd("generate-facets-processor", doc(f("id", "6"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNull(schema.getFieldOrNull("string.json.description")); assertNull(d.getFieldValue("string.json.description")); }
From source file:com.sindicetech.siren.solr.facet.TestSirenFacetProcessorFactory.java
License:Open Source License
public void testCustomDatatypeField() throws Exception { String json = "{\"rating\": {\"_datatype_\": \"http://www.w3.org/2001/XMLSchema#double\", \"_value_\":\"5.4\"}}"; IndexSchema schema = h.getCore().getLatestSchema(); SolrInputDocument d = processAdd("generate-facets-processor", doc(f("id", "1"), f("json", json))); assertNotNull(d);/*w w w .j ava 2 s . c o m*/ schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("double.json.rating")); assertEquals("tdouble", schema.getFieldType("double.json.rating").getTypeName()); assertTrue((5.4 - (double) d.getFieldValue("double.json.rating")) < 0.01); getWrapper().add(d); // add so that we can test facets by querying this.commit(); SolrQuery q = new SolrQuery(); q.setRequestHandler("keyword"); q.setParam("nested", "{!lucene} *:*"); q.setFacet(true); q.addFacetField("double.json.rating"); QueryResponse r = getWrapper().getServer().query(q); // we know there is only one facet field with one value assertEquals(1, r.getFacetFields().get(0).getValues().get(0).getCount()); json = "{\"rating\": {\"_datatype_\": \"http://www.w3.org/2001/XMLSchema#float\", \"_value_\":\"-8.4\"}}"; schema = h.getCore().getLatestSchema(); d = processAdd("generate-facets-processor", doc(f("id", "2"), f("json", json))); assertNotNull(d); schema = h.getCore().getLatestSchema(); assertNotNull(schema.getFieldOrNull("double.json.rating")); assertEquals("tdouble", schema.getFieldType("double.json.rating").getTypeName()); assertTrue((-8.4 + (double) d.getFieldValue("double.json.rating")) < 0.01); getWrapper().add(d); // add so that we can test facets by querying this.commit(); r = getWrapper().getServer().query(q); // there is only one facet field with two different values each with a single count assertEquals(1, r.getFacetFields().get(0).getValues().get(0).getCount()); assertEquals(1, r.getFacetFields().get(0).getValues().get(1).getCount()); }
From source file:com.sindicetech.siren.solr.response.SirenProjectionTransformer.java
License:Open Source License
@Override public void transform(SolrDocument doc, int docid) throws IOException { Query query = context.query;/*from w ww .java 2 s . co m*/ SimpleJsonByQueryExtractor extractor = new SimpleJsonByQueryExtractor(); try { IndexSchema schema = context.req.getSchema(); for (String fieldName : doc.getFieldNames()) { FieldType ft = schema.getFieldOrNull(fieldName).getType(); if (ft instanceof ExtendedJsonField) { String sirenField = (String) doc.getFieldValue(fieldName); String json = extractor.extractAsString(sirenField, query); if (json == null) { // query doesn't contain variables, no transformation is necessary continue; } doc.setField(fieldName, json); } } } catch (ProjectionException e) { throw new IOException( String.format("Problem while projecting (extracting variables from matched document id %s", doc.getFieldValue("id")), e); } }
From source file:net.yacy.cora.federate.solr.responsewriter.EnhancedXMLResponseWriter.java
License:Open Source License
private static final void writeDoc(final Writer writer, final IndexSchema schema, final String name, final List<IndexableField> fields, final float score, final boolean includeScore) throws IOException { startTagOpen(writer, "doc", name); if (includeScore) { writeTag(writer, "float", "score", Float.toString(score), false); }//from ww w. j a va2s . com int sz = fields.size(); int fidx1 = 0, fidx2 = 0; while (fidx1 < sz) { IndexableField value = fields.get(fidx1); String fieldName = value.name(); fidx2 = fidx1 + 1; while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) { fidx2++; } SchemaField sf = schema == null ? null : schema.getFieldOrNull(fieldName); if (sf == null) { sf = new SchemaField(fieldName, new TextField()); } FieldType type = sf.getType(); if (fidx1 + 1 == fidx2) { if (sf.multiValued()) { startTagOpen(writer, "arr", fieldName); writer.write(lb); String sv = value.stringValue(); writeField(writer, type.getTypeName(), null, sv); //sf.write(this, null, f1); writer.write("</arr>"); } else { writeField(writer, type.getTypeName(), value.name(), value.stringValue()); //sf.write(this, f1.name(), f1); } } else { startTagOpen(writer, "arr", fieldName); writer.write(lb); for (int i = fidx1; i < fidx2; i++) { String sv = fields.get(i).stringValue(); writeField(writer, type.getTypeName(), null, sv); //sf.write(this, null, (Fieldable)this.tlst.get(i)); } writer.write("</arr>"); writer.write(lb); } fidx1 = fidx2; } writer.write("</doc>"); writer.write(lb); }
From source file:net.yacy.cora.federate.solr.responsewriter.FlatJSONResponseWriter.java
License:Open Source License
private static final void writeDoc(final Writer writer, final IndexSchema schema, final String name, final List<IndexableField> fields, final float score, final boolean includeScore) throws IOException { JSONObject json = new JSONObject(true); int sz = fields.size(); int fidx1 = 0, fidx2 = 0; while (fidx1 < sz) { IndexableField value = fields.get(fidx1); String fieldName = value.name(); fidx2 = fidx1 + 1;/*from w w w . j av a2 s. co m*/ while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) { fidx2++; } SchemaField sf = schema == null ? null : schema.getFieldOrNull(fieldName); if (sf == null) { sf = new SchemaField(fieldName, new TextField()); } FieldType type = sf.getType(); if (fidx1 + 1 == fidx2) { if (sf.multiValued()) { JSONArray a = new JSONArray(); json.put(fieldName, a); JSONObject j = new JSONObject(); String sv = value.stringValue(); setValue(j, type.getTypeName(), "x", sv); //sf.write(this, null, f1); a.add(j.get("x")); } else { setValue(json, type.getTypeName(), value.name(), value.stringValue()); } } else { JSONArray a = new JSONArray(); json.put(fieldName, a); for (int i = fidx1; i < fidx2; i++) { String sv = fields.get(i).stringValue(); JSONObject j = new JSONObject(); setValue(j, type.getTypeName(), "x", sv); //sf.write(this, null, f1); a.add(j.get("x")); } } fidx1 = fidx2; } writer.write(json.toString()); writer.write(lb); }
From source file:net.yacy.cora.federate.solr.responsewriter.HTMLResponseWriter.java
License:Open Source License
public static final LinkedHashMap<String, String> translateDoc(final IndexSchema schema, final Document doc) { List<IndexableField> fields = doc.getFields(); int sz = fields.size(); int fidx1 = 0, fidx2 = 0; LinkedHashMap<String, String> kv = new LinkedHashMap<String, String>(); while (fidx1 < sz) { IndexableField value = fields.get(fidx1); String fieldName = value.name(); fidx2 = fidx1 + 1;//from w w w.j a v a 2 s . c o m while (fidx2 < sz && fieldName.equals(fields.get(fidx2).name())) { fidx2++; } SchemaField sf = schema.getFieldOrNull(fieldName); if (sf == null) sf = new SchemaField(fieldName, new TextField()); FieldType type = sf.getType(); if (fidx1 + 1 == fidx2) { if (sf.multiValued()) { String sv = value.stringValue(); kv.put(fieldName, field2string(type, sv)); } else { kv.put(fieldName, field2string(type, value.stringValue())); } } else { int c = 0; for (int i = fidx1; i < fidx2; i++) { String sv = fields.get(i).stringValue(); kv.put(fieldName + "_" + c++, field2string(type, sv)); } } fidx1 = fidx2; } return kv; }
From source file:org.alfresco.solr.LegacySolrInformationServer.java
License:Open Source License
public static Document toDocument(SolrInputDocument doc, IndexSchema schema, AlfrescoSolrDataModel model) { Document out = new Document(); out.setBoost(doc.getDocumentBoost()); // Load fields from SolrDocument to Document for (SolrInputField field : doc) { String name = field.getName(); SchemaField sfield = schema.getFieldOrNull(name); boolean used = false; float boost = field.getBoost(); // Make sure it has the correct number if (sfield != null && !sfield.multiValued() && field.getValueCount() > 1) { String id = ""; SchemaField sf = schema.getUniqueKeyField(); if (sf != null) { id = "[" + doc.getFieldValue(sf.getName()) + "] "; }/*from ww w.ja v a 2s. c o m*/ throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: " + id + "multiple values encountered for non multiValued field " + sfield.getName() + ": " + field.getValue()); } // load each field value boolean hasField = false; for (Object v : field) { // TODO: Sort out null if (v == null) { continue; } String val = null; hasField = true; boolean isBinaryField = false; if (sfield != null && sfield.getType() instanceof BinaryField) { isBinaryField = true; BinaryField binaryField = (BinaryField) sfield.getType(); Field f = binaryField.createField(sfield, v, boost); if (f != null) out.add(f); used = true; } else { // TODO!!! HACK -- date conversion if (sfield != null && v instanceof Date && sfield.getType() instanceof DateField) { DateField df = (DateField) sfield.getType(); val = df.toInternal((Date) v) + 'Z'; } else if (v != null) { val = v.toString(); } if (sfield != null) { if (v instanceof Reader) { used = true; Field f = new Field(field.getName(), (Reader) v, model.getFieldTermVec(sfield)); f.setOmitNorms(model.getOmitNorms(sfield)); f.setOmitTermFreqAndPositions(sfield.omitTf()); if (f != null) { // null fields are not added out.add(f); } } else { used = true; Field f = sfield.createField(val, boost); if (f != null) { // null fields are not added out.add(f); } } } } // Check if we should copy this field to any other fields. // This could happen whether it is explicit or not. List<CopyField> copyFields = schema.getCopyFieldsList(name); for (CopyField cf : copyFields) { SchemaField destinationField = cf.getDestination(); // check if the copy field is a multivalued or not if (!destinationField.multiValued() && out.get(destinationField.getName()) != null) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR: multiple values encountered for non multiValued copy field " + destinationField.getName() + ": " + val); } used = true; Field f = null; if (isBinaryField) { if (destinationField.getType() instanceof BinaryField) { BinaryField binaryField = (BinaryField) destinationField.getType(); f = binaryField.createField(destinationField, v, boost); } } else { f = destinationField.createField(cf.getLimitedValue(val), boost); } if (f != null) { // null fields are not added out.add(f); } } // In lucene, the boost for a given field is the product of the // document boost and *all* boosts on values of that field. // For multi-valued fields, we only want to set the boost on the // first field. boost = 1.0f; } // make sure the field was used somehow... if (!used && hasField) { throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ERROR:unknown field '" + name + "'"); } } // Now validate required fields or add default values // fields with default values are defacto 'required' for (SchemaField field : schema.getRequiredFields()) { if (out.getField(field.getName()) == null) { if (field.getDefaultValue() != null) { out.add(field.createField(field.getDefaultValue(), 1.0f)); } else { String id = schema.printableUniqueKey(out); String msg = "Document [" + id + "] missing required field: " + field.getName(); throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, msg); } } } return out; }