List of usage examples for org.apache.solr.common SolrInputDocument getFieldValue
@Override
public Object getFieldValue(String name)
From source file:actors.SolrActor.java
License:Apache License
public void indexUpdated(SolrIndexEvent msg) { try {/*from www .j a v a 2s . c o m*/ System.out.println("SolrIndexEvent"); SolrInputDocument doc = msg.getDocuement(); //Making realtime GET System.out.println("GET"); SolrQuery parameters = new SolrQuery(); parameters.setRequestHandler("/get"); String f1 = doc.getFieldValue("literal.id").toString(); String f2 = doc.getFieldValue("literal.rev").toString(); parameters.set("id", f1); parameters.set("rev", f2); //System.out.println(parameters); QueryResponse response = server.query(parameters); NamedList<Object> result = response.getResponse(); //System.out.println(response.getResponse()); //System.out.println(result.size() ); //System.out.println(); //System.out.println(result); //validate the doc exists if (result == null || result.get("doc") == null) { System.out.println("/update/extract"); ContentStreamUpdateRequest req = new ContentStreamUpdateRequest("/update/extract"); // url dropbox URL url = new URL(doc.getFieldValue("literal.links").toString()); ContentStreamBase content = new ContentStreamBase.URLStream(url); System.out.println("ContentStreamBase"); req.addContentStream(content); // Adittionall metadata req.setParam("literal.id", doc.getFieldValue("literal.id").toString()); req.setParam("literal.title", doc.getFieldValue("literal.title").toString()); req.setParam("literal.rev", doc.getFieldValue("literal.rev").toString()); req.setParam("literal.when", doc.getFieldValue("literal.when").toString()); req.setParam("literal.path", doc.getFieldValue("literal.path").toString()); req.setParam("literal.icon", doc.getFieldValue("literal.icon").toString()); req.setParam("literal.size", doc.getFieldValue("literal.size").toString()); req.setParam("literal.url", doc.getFieldValue("literal.links").toString()); req.setParam("uprefix", "attr_"); req.setParam("fmap.content", "attr_content"); req.setAction(AbstractUpdateRequest.ACTION.COMMIT, true, true); //Requesting Solr result = server.request(req); //System.out.println("Result: " + result.toString()); } else { System.out.println("It's already update"); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:at.newmedialab.lmf.search.services.indexing.SolrCoreRuntime.java
License:Apache License
/** * Queue the input document in the document queue of this SolrCoreRuntime and check whether it is * necessary to commit.//from w w w . j a v a 2 s. c om * * @param doc the document to be added to the Solr Core */ public void queueInputDocument(SolrInputDocument doc) { if (doc != null) { serverLock.lock(); try { final Object fv = doc.getFieldValue("id"); if (fv != null) { UpdateRequest update = new UpdateRequest(); update.setCommitWithin(10000); update.add(doc); //update.setAction(ACTION.COMMIT, false, false); server.request(update); } else { log.warn("({}) rejected document without 'id' for update", config.getName()); } } catch (IOException e) { log.warn("I/O exception while adding SOLR document to index", e); } catch (SolrServerException e) { log.warn("server exception while adding SOLR document to index", e); } finally { serverLock.unlock(); } } }
From source file:at.pagu.soldockr.core.convert.MappingSolrConverterTest.java
License:Apache License
@Test public void testWrite() { ConvertableBean convertable = new ConvertableBean("j73x73r", 1979); SolrInputDocument solrDocument = new SolrInputDocument(); converter.write(convertable, solrDocument); Assert.assertEquals(convertable.getStringProperty(), solrDocument.getFieldValue("stringProperty")); Assert.assertEquals(convertable.getIntProperty(), solrDocument.getFieldValue("intProperty")); }
From source file:com.cloudera.cdk.morphline.solrcell.SolrCellMorphlineTest.java
License:Apache License
/** * Test that the ContentHandler properly strips the illegal characters *//*from w w w . j av a2 s.c o m*/ @Test public void testTransformValue() { String fieldName = "user_name"; assertFalse("foobar".equals(getFoobarWithNonChars())); Metadata metadata = new Metadata(); // load illegal char string into a metadata field and generate a new document, // which will cause the ContentHandler to be invoked. metadata.set(fieldName, getFoobarWithNonChars()); StripNonCharSolrContentHandlerFactory contentHandlerFactory = new StripNonCharSolrContentHandlerFactory( DateUtil.DEFAULT_DATE_FORMATS); IndexSchema schema = h.getCore().getLatestSchema(); SolrContentHandler contentHandler = contentHandlerFactory.createSolrContentHandler(metadata, new MapSolrParams(new HashMap()), schema); SolrInputDocument doc = contentHandler.newDocument(); String foobar = doc.getFieldValue(fieldName).toString(); assertTrue("foobar".equals(foobar)); }
From source file:com.cominvent.solr.update.processor.MappingUpdateProcessor.java
License:Apache License
@Override public void processAdd(AddUpdateCommand cmd) throws IOException { if (isEnabled()) { SolrInputDocument doc = cmd.getSolrInputDocument(); // Fetch document id String docId = ""; if (doc.containsKey(docIdField)) docId = (String) doc.getFieldValue(docIdField); String inValue = (String) doc.getFieldValue(inputField); String outValue;/*from w ww .j a va 2s . co m*/ if (map.containsKey(inValue)) { outValue = map.get(inValue); } else { outValue = fallbackValue; } if (outValue != null && outValue.length() > 0) { log.debug("Mapping done for document " + docId + ": " + inValue + " => " + outValue); doc.setField(outputField, outValue); } } else { log.debug("MappingUpdateProcessor is not enabled. Skipping"); } super.processAdd(cmd); }
From source file:com.francelabs.datafari.updateprocessor.DatafariUpdateProcessor.java
License:Apache License
@Override public void processAdd(final AddUpdateCommand cmd) throws IOException { final SolrInputDocument doc = cmd.getSolrInputDocument(); // Sometimes Tika put several ids so we keep the first one which is // always the right one if (doc.getFieldValues("id").size() > 1) { final Object id = doc.getFieldValue("id"); doc.remove("id"); doc.addField("id", id); }/*from ww w .j a va2s. c om*/ // Try to retrieve at the ignored_filelastmodified field to set it's // value in the last_modified field if (doc.getFieldValue("ignored_filelastmodified") != null) { final Object last_modified = doc.getFieldValue("ignored_filelastmodified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } // Sometimes Tika put several last_modified dates, so we keep the first // one which is always the right one if ((doc.getFieldValues("last_modified") != null) && (doc.getFieldValues("last_modified").size() > 1)) { final Object last_modified = doc.getFieldValue("last_modified"); doc.remove("last_modified"); doc.addField("last_modified", last_modified); } final String url = (String) doc.getFieldValue("id"); // Create path hierarchy for facet final List<String> urlHierarchy = new ArrayList<>(); /* * // Create path hierarchy for facet * * final List<String> urlHierarchy = new ArrayList<String>(); * * final String path = url.replace("file:", ""); int previousIndex = 1; int * depth = 0; // Tokenize the path and add the depth as first character for * each token // (like: 0/home, 1/home/project ...) for (int i = 0; i < * path.split("/").length - 2; i++) { int endIndex = path.indexOf('/', * previousIndex); if (endIndex == -1) { endIndex = path.length() - 1; } * urlHierarchy.add(depth + path.substring(0, endIndex)); depth++; * previousIndex = endIndex + 1; } * * // Add the tokens to the urlHierarchy field doc.addField("urlHierarchy", * urlHierarchy); */ doc.addField("url", url); String filename = ""; final SolrInputField streamNameField = doc.get("ignored_stream_name"); if (streamNameField != null) { filename = (String) streamNameField.getFirstValue(); } else { final Pattern pattern = Pattern.compile("[^/]*$"); final Matcher matcher = pattern.matcher(url); if (matcher.find()) { filename = matcher.group(); } } if (url.startsWith("http")) { if (doc.get("title") == null) { doc.addField("title", filename); } doc.addField("source", "web"); } if (url.startsWith("file")) { doc.removeField("title"); doc.addField("title", filename); doc.addField("source", "file"); } String extension = ""; URL urlObject = new URL(url); String path = urlObject.getPath(); final SolrInputField mimeTypeField = doc.get("ignored_content_type"); String nameExtension = FilenameUtils.getExtension(path); String tikaExtension = mimeTypeField == null ? "" : extensionFromMimeTypeField(mimeTypeField); if (extensionFromName) { extension = nameExtension.length() > 1 && nameExtension.length() < 5 ? nameExtension : tikaExtension; } else { extension = tikaExtension.length() > 1 && tikaExtension.length() < 5 ? tikaExtension : nameExtension; } /* if (extensionFromName || mimeTypeField == null) { if (path.contains(".")){ extension = FilenameUtils.getExtension(path); if (extension.length() > 4 || extension.length() < 1) { // If length is too long, try extracting from tika information if available String tryExtension = mimeTypeField==null ? null : extensionFromMimeTypeField(mimeTypeField); if (tryExtension != null) { extension = tryExtension; } else { // Else default to bin for anything else extension = "bin"; } } } else if (urlObject.getProtocol().equals("http") || urlObject.getProtocol().equals("https")) { extension = null; if (mimeTypeField != null) { extension = extensionFromMimeTypeField(mimeTypeField); } if (extension == null) { extension = "html"; } } } else { extension = extensionFromMimeTypeField(mimeTypeField); if (extension == null) { extension = FilenameUtils.getExtension(path); } } */ doc.addField("extension", extension.toLowerCase()); super.processAdd(cmd); }
From source file:com.github.le11.nls.solr.UIMAAsyncUpdateRequestProcessor.java
License:Apache License
private String[] getTextsToAnalyze(SolrInputDocument solrInputDocument) { String[] fieldsToAnalyze = solrUIMAConfiguration.getFieldsToAnalyze(); boolean merge = solrUIMAConfiguration.isFieldsMerging(); String[] textVals;//from w ww . j av a 2 s . c o m if (merge) { StringBuilder unifiedText = new StringBuilder(""); for (int i = 0; i < fieldsToAnalyze.length; i++) { unifiedText.append(String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i]))); } textVals = new String[1]; textVals[0] = unifiedText.toString(); } else { textVals = new String[fieldsToAnalyze.length]; for (int i = 0; i < fieldsToAnalyze.length; i++) { textVals[i] = String.valueOf(solrInputDocument.getFieldValue(fieldsToAnalyze[i])); } } return textVals; }
From source file:com.gu.solr.MergeUtils.java
License:Apache License
private static SolrInputDocument copy(SolrInputDocument document) { SolrInputDocument copy = new SolrInputDocument(); for (String name : document.getFieldNames()) { copy.addField(name, document.getFieldValue(name)); }/*ww w . ja va2s . c om*/ return copy; }
From source file:com.ibm.watson.developer_cloud.professor_languo.ingestion.indexing.RetrieveAndRankIndexerTest.java
License:Open Source License
/** * check if the deserialized candidate answers are exactly the same as the candidate answers we * expected. Since the query is "question Title: what is right", the expected candidate answers * should be the entire corpus.// www . j a va 2s.c om * * @throws IngestionException */ private void compare_indexed_records_to_corpus() throws IngestionException { indexdCorpus = corpusBuilder.getUniqueThreadSetFromBinFiles(); // Check that the size of the corpus is the same as the size of the // indexed documents assertTrue("Wrong number of documents indexed", indexedRecords.size() == indexdCorpus.size()); // Check that the indexed document in the corpus is in the index File serFile = new File(corpusBuilder.getUniqueThreadDirPath()).listFiles()[0]; StackExchangeThread thread = StackExchangeThreadSerializer.deserializeThreadFromBinFile(serFile.getPath()); final Document luceneDoc = new LuceneDocumentMapper().createDocument(thread); SolrInputDocument recordDoc = indexedRecords.get(0); for (IndexableField field : luceneDoc.getFields()) { BytesRef bin = luceneDoc.getBinaryValue(field.name()); // Check that indexed fields (title and id) are indexed correctly if (field.name().equals(IndexDocumentFieldName.THREAD_TITLE.toString()) || field.name().equals(IndexDocumentFieldName.THREAD_POST_ID.toString())) { String value = luceneDoc.get(field.name()); assertEquals(value, recordDoc.getFieldValue(field.name()).toString()); } // Check that indexed serialized field is indexed correctly if (bin != null) { BytesRef recordbin = new BytesRef((byte[]) recordDoc.getFieldValue(field.name())); assertEquals(bin, recordbin); } } }
From source file:com.ngdata.hbaseindexer.indexer.FusionDocumentWriter.java
License:Apache License
/** * shs: This method was modified from its original to add the input parameters 'parent' and 'docCount'. This was done * to enable recursion to be used to find all parent/child relationships to any level. The method will merge the * fields in the parent document into the child document and will then convert that merged document into JSON * format and return that JSON document to the caller. * @param parent The parent document for the child document being passed in. Parent may be null if the child being * passed in is a member of the initial documents submitted. * @param child This is the child document. It will have the parent's fields merged into it. * @param docCount This is a count of the number of documents that have been added in this processing. * @return The merged parent and child documents as a JSON formatted document, in a format acceptable to * Fusion.//from ww w.j a v a 2 s .c o m */ protected Map<String, Object> doc2json(SolrInputDocument parent, SolrInputDocument child, int docCount) { Map<String, Object> json = new HashMap<String, Object>(); if (child != null) { String docId = (String) child.getFieldValue("id"); if (docId == null) { if (parent != null) { String parentId = (String) parent.getFieldValue("id"); docId = parentId + "-" + docCount; } if (docId == null) throw new IllegalStateException("Couldn't resolve the id for document: " + child); } json.put("id", docId); List fields = new ArrayList(); if (parent != null) { if (log.isDebugEnabled()) log.debug("Method:doc2json - Merging parent and child docs, parent:[" + parent.toString() + "]; child[" + child.toString() + "]."); // have a parent doc ... flatten by adding all parent doc fields to the child with prefix _p_ for (String f : parent.getFieldNames()) { if ("id".equals(f)) { fields.add(mapField("_p_id", null /* field name prefix */, parent.getField("id").getFirstValue())); } else { appendField(parent, f, "_p_", fields); } } } for (String f : child.getFieldNames()) { if (!"id".equals(f)) { // id already added appendField(child, f, null, fields); } } // keep track of the time we saw this doc on the hbase side String tdt = DateUtil.getThreadLocalDateFormat().format(new Date()); fields.add(mapField("_hbasets_tdt", null, tdt)); if (log.isDebugEnabled()) log.debug(strIndexName + " Reconcile id = " + docId + " and timestamp = " + tdt); json.put("fields", fields); } else { log.warn("method:doc2json - Input parameter 'child' was null."); } return json; }