List of usage examples for org.apache.solr.common SolrInputDocument SolrInputDocument
SolrInputDocument
From source file:WikipediaModel.java
License:Open Source License
public SolrInputDocument getDocument() { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", this.id); doc.addField("title", this.title); doc.addField("title_annotation", this.titleAnnotation); doc.addField("text", this.text); doc.addField("text_count", this.text.length()); doc.addField("last_modified", this.lastModified); return doc;/* www . ja v a 2 s . c o m*/ }
From source file:SolrUpdate.java
License:Apache License
public void setSOLRMetadata() { server = new HttpSolrServer("http://localhost:8983/solr"); metadoc = new SolrInputDocument(); metadoc.addField("pmid", selectedPub.getPmid()); metadoc.addField("abstract", selectedPub.getAbstract()); metadoc.addField("publicationdate_year", selectedPub.getYear()); metadoc.addField("doi", selectedPub.getDoi()); metadoc.addField("journalvolume", selectedPub.getJournalvolume()); metadoc.addField("journalissue", selectedPub.getJournalissue()); metadoc.addField("journalmonth", selectedPub.getJournalmonth()); metadoc.addField("journalyear", selectedPub.getJournalyear()); metadoc.addField("journalday", selectedPub.getJournalday()); metadoc.addField("journalname", selectedPub.getJournalname()); metadoc.addField("journalpage", selectedPub.getJournalstartpg()); metadoc.addField("epubday", selectedPub.getEpubday()); metadoc.addField("epubmonth", selectedPub.getEpubmonth()); metadoc.addField("epubyear", selectedPub.getEpubyear()); metadoc.addField("author_fullname_list", selectedPub.getAuthorfull()); metadoc.addField("completion", selectedPub.getCompletion()); metadoc.addField("draftpoint", selectedPub.getDraftpoint()); metadoc.addField("lruid", selectedPub.getLruid()); metadoc.addField("ptitle", selectedPub.getTitle()); for (int i = 0; i < selectedPub.getFauthors().size(); i++) { metadoc.addField("author_firstname", selectedPub.getFauthors().get(i)); metadoc.addField("author_lastname", selectedPub.getLauthors().get(i)); }/*ww w .j a va 2 s .co m*/ for (String currstring : selectedPub.getFilesanddata()) { metadoc.addField("pfileinfo", currstring); } }
From source file:OSMHandler.java
License:Apache License
@Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (qName.equals("node")) { current = new SolrInputDocument(); long id = Long.parseLong(attributes.getValue("id")); current.addField("id", id); current.addField("type", "node"); //NOTE: I'm doing this on the client side of Solr for demonstration purposes. A more effective solution, coming in Solr 1.5, will handle indexing on the server side. double latitude = Double.parseDouble(attributes.getValue("lat")); double longitude = Double.parseDouble(attributes.getValue("lon")); current.addField("lat", latitude); current.addField("lon", longitude); //should use copy field, but do the conversion here current.addField("lat_rad", latitude * TO_RADS); current.addField("lon_rad", longitude * TO_RADS); //See http://en.wikipedia.org/wiki/Geohash String geoHash = GeoHashUtils.encode(latitude, longitude); current.addField("geohash", geoHash); //Cartesian Tiers int tier = START_TIER; //Create a bunch of tiers, each deeper level has more precision for (CartesianTierPlotter plotter : plotters) { current.addField("tier_" + tier, plotter.getTierBoxId(latitude, longitude)); tier++;//ww w . j a va2 s .co m } int version = Integer.parseInt(attributes.getValue("version")); current.addField("version", version); String user = attributes.getValue("user"); current.addField("user", user); String timestamp = attributes.getValue("timestamp"); current.addField("timestamp", timestamp); addFacts(id); docBuffer.add(current); } else if (qName.equals("way")) { //A "way", is a bunch of nodes linked together, see the sample at the bottom //For ways, get the tag info just the same /* <way id="29124014" visible="true" timestamp="2008-12-21T04:44:36Z" version="2" changeset="514025" user="Shadow" uid="24160"> <nd ref="320408502"/> <nd ref="320408503"/> <tag k="name" v="Gopher Way"/> <tag k="created_by" v="Potlatch 0.10f"/> <tag k="highway" v="footway"/> <tag k="source" v="knowledge; image"/> <tag k="layer" v="2"/> </way> */ current = new SolrInputDocument(); long id = Long.parseLong(attributes.getValue("id")); addFacts(id); current.addField("type", "way"); current.addField("id", id); docBuffer.add(current); } else if (current != null && qName.equals("nd")) { long refId = Long.parseLong(attributes.getValue("ref")); current.addField("way_id", refId); } else if (current != null && qName.equals("tag")) { String key = attributes.getValue("k"); String value = attributes.getValue("v"); //is it a specific one we care about? If not, just add as a dynamic field //TODO: make this scalable //Could likely make this more pluggable, too, to give people a chance to deal with the tags in their own way if (key.equals("ele")) { key = "ele"; } else if ((key.indexOf("County") != -1 || key.indexOf("county") != -1) && key.equals("gnis:County_num") == false) { key = "county"; } else if (key.equals("source") || key.equals("name") || key.equals("population")) { //fall through } else if (key.equals("is_in")) { //Split on comma, reverse the array and add from large to small String[] splits = value.split(","); StringBuilder newV = new StringBuilder(value.length()); for (int i = 0; i < splits.length; i++) { newV.append(splits[i]); if (i < splits.length - 1) { newV.append(' '); } } value = newV.toString(); } else { //How should we deal with other keys? let's add them as Strings key += "_s"; } current.addField(key, value); } else if (current != null && qName.equals("bounds")) { //Should we just ignore or index it onto every doc? } }
From source file:at.newmedialab.lmf.search.services.indexing.SolrIndexingServiceImpl.java
License:Apache License
@Override public void indexResource(Resource resource, SolrCoreRuntime runtime) { Program<Value> program = runtime.getConfiguration().getProgram(); if (program == null) { try {/* w w w.j a v a2 s.c o m*/ program = solrProgramService .parseProgram(new StringReader(runtime.getConfiguration().getProgramString())); runtime.getConfiguration().setProgram(program); } catch (LDPathParseException e) { log.error("error parsing path program for engine {}", runtime.getConfiguration().getName(), e); return; } } if (resource == null) return; final String coreName = runtime.getConfiguration().getName(); final String rID = getResourceId(resource); try { final RepositoryConnection connection = sesameService.getConnection(); try { connection.begin(); //if (resource instanceof KiWiResource && ((KiWiResource) resource).isDeleted()) { // runtime.queueDeletion(rID); //} //FIXME: find a proper way to do this with the new api boolean deleted = true; RepositoryResult<Statement> statements = connection.getStatements(resource, null, null, false); while (statements.hasNext()) { if (!ResourceUtils.isDeleted(connection, statements.next())) { deleted = false; break; } } if (deleted) { runtime.queueDeletion(rID); } final Resource[] contexts; if (program.getGraphs().isEmpty()) { contexts = new Resource[0]; } else { contexts = Collections2.transform(program.getGraphs(), new Function<java.net.URI, URI>() { @Override public URI apply(java.net.URI in) { return connection.getValueFactory().createURI(in.toString()); } }).toArray(new Resource[0]); } final SesameConnectionBackend backend = ContextAwareSesameConnectionBackend .withConnection(connection, contexts); if (program.getFilter() != null && !program.getFilter().apply(backend, resource, Collections.singleton((Value) resource))) { if (log.isDebugEnabled()) { log.debug("({}) <{}> does not match filter '{}', ignoring", coreName, resource, program.getFilter().getPathExpression(backend)); } // Some resources might be still in the index, so delete it. runtime.queueDeletion(rID); connection.commit(); return; } else if (log.isTraceEnabled() && program.getFilter() != null) { log.trace("({}) <{}> matches filter '{}', indexing...", coreName, resource, program.getFilter().getPathExpression(backend)); } SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", rID); doc.addField("lmf.indexed", new Date()); if (resource instanceof KiWiUriResource) { doc.addField("lmf.created", ((KiWiUriResource) resource).getCreated()); } if (resource instanceof URI) { URI r = (URI) resource; doc.addField("lmf.uri", r.stringValue()); } else if (resource instanceof BNode) { BNode r = (BNode) resource; doc.addField("lmf.anon_id", r.getID()); } else { // This should not happen, but never the less... log.warn("Tried to index a Resource that is neither a URI nor BNode: {}", resource); runtime.queueDeletion(rID); connection.rollback(); return; } for (Resource type : getTypes(connection, resource)) { if (type instanceof KiWiUriResource) { doc.addField("lmf.type", type.stringValue()); } } // Set the document boost if (program.getBooster() != null) { final Collection<Float> boostValues = program.getBooster().getValues(backend, resource); if (boostValues.size() > 0) { final Float docBoost = boostValues.iterator().next(); if (boostValues.size() > 1) { log.warn("found more than one boostFactor for <{}>, using {}", resource, docBoost); } doc.setDocumentBoost(docBoost); } } // set shortcut fields Set<Value> dependencies = new HashSet<Value>(); for (FieldMapping<?, Value> rule : program.getFields()) { // Map<Value, List<Value>> paths = new HashMap<Value, List<Value>>(); // Collection<?> values = rule.getValues(backend, resource, paths); //FIXME: Temporary fixing due LDPath reverse properties selector bug Map<Value, List<Value>> paths = null; Collection<?> values = null; if (runtime.getConfiguration().isUpdateDependencies()) { paths = new HashMap<Value, List<Value>>(); values = rule.getValues(backend, resource, paths); } else { values = rule.getValues(backend, resource); } // try { final boolean isSinge = !isMultiValuedField(rule); for (Object value : values) { if (value != null) { doc.addField(rule.getFieldName(), value); if (isSinge) { break; } } } if (rule.getFieldConfig() != null) { final String b = rule.getFieldConfig().get("boost"); try { if (b != null) { doc.getField(rule.getFieldName()).setBoost(Float.parseFloat(b)); } } catch (NumberFormatException e) { throw new NumberFormatException("could not parse boost value: '" + b + "'"); } } } catch (Exception ex) { log.error("({}) exception while building path indexes for <{}>, field {}: {}", coreName, resource, rule.getFieldName(), ex.getMessage()); log.debug("(" + coreName + ") stacktrace", ex); } if (runtime.getConfiguration().isUpdateDependencies()) { for (List<Value> path : paths.values()) { dependencies.addAll(path); } } } if (runtime.getConfiguration().isUpdateDependencies()) { for (Value dependency : dependencies) { if (dependency instanceof URI && !dependency.equals(resource)) { doc.addField("lmf.dependencies", dependency.stringValue()); } } } runtime.queueInputDocument(doc); connection.commit(); } finally { connection.close(); } } catch (RepositoryException e) { log.warn("Could not build index document for " + resource.stringValue(), e); } catch (Throwable t) { log.error("unknown error while indexing document", t); } }
From source file:at.pagu.soldockr.core.convert.MappingSolrConverterTest.java
License:Apache License
@Test public void testWrite() { ConvertableBean convertable = new ConvertableBean("j73x73r", 1979); SolrInputDocument solrDocument = new SolrInputDocument(); converter.write(convertable, solrDocument); Assert.assertEquals(convertable.getStringProperty(), solrDocument.getFieldValue("stringProperty")); Assert.assertEquals(convertable.getIntProperty(), solrDocument.getFieldValue("intProperty")); }
From source file:at.pagu.soldockr.core.SolrTemplate.java
License:Apache License
@Override public SolrInputDocument convertBeanToSolrInputDocument(Object bean) { if (bean instanceof SolrInputDocument) { return (SolrInputDocument) bean; }/*from w ww .j a va 2s . c o m*/ SolrInputDocument document = new SolrInputDocument(); getConverter().write(bean, document); return document; }
From source file:au.edu.aekos.shared.solr.index.SubmissionSolrDocumentBuilder.java
public static SolrInputDocument initialiseSHaREDDocument(String id, String doi, String title, String datasetNameForCitation, String datasetAbstract, Date firstVisit, Date lastVisit) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", id); doc.addField("doi", doi); doc.addField("title", title); doc.addField("datasetFormalName_t", title); doc.addField("datasetNameForCitation_t", datasetNameForCitation); doc.addField("description", datasetAbstract); doc.addField("datasetAbstract_t", datasetAbstract); doc.addField("firstStudyAreaVisitDate_dt", firstVisit); doc.addField("lastStudyAreaVisitDate_dt", firstVisit); return doc;/*from ww w . ja v a2s . c o m*/ }
From source file:au.org.ala.bhl.service.IndexingService.java
License:Open Source License
/** * Send a page of text to SOLR for indexing * /*from w w w.j a va 2 s. c o m*/ * @param item * @param pageId * @param pageText * @param server */ private void indexPage(ItemDescriptor item, String pageId, String pageText, SolrServer server) { if (!StringUtils.isEmpty(pageText)) { SolrInputDocument doc = new SolrInputDocument(); doc.addField("id", pageId, 1.0f); doc.addField("name", item.getTitle(), 1.0f); doc.addField("text", pageText); doc.addField("internetArchiveId", item.getInternetArchiveId()); doc.addField("itemId", item.getItemId()); doc.addField("pageId", pageId, 1.0f); doc.addField("pageUrl", String.format("http://bhl.ala.org.au/pageimage/%s", pageId)); JsonNode metadata = _docCache.getItemMetaData(item); if (metadata != null) { addItemMetadata(doc, metadata); JsonNode titleData = _docCache.getTitleMetaData(item); if (titleData != null) { addTitleMetadata(doc, titleData); } } try { server.add(doc); } catch (Exception ex) { throw new RuntimeException(ex); } } }
From source file:au.org.intersect.dms.catalogue.db.DatasetIndexer.java
License:Open Source License
public void indexDatasets(Collection<DbDataset> datasets) { List<SolrInputDocument> documents = new ArrayList<SolrInputDocument>(); dataset: for (DbDataset dataset : datasets) { LOGGER.debug("Indexing dataset with ID {}", dataset.getId()); SolrInputDocument sid = new SolrInputDocument(); addBasicFields(dataset, sid);/* w w w. jav a 2 s . c o m*/ StringBuilder summary = new StringBuilder(); summary.append(dataset.getCreationDate()).append(SPACE).append(dataset.getId()).append(SPACE) .append(dataset.getOwner()).append(SPACE).append(dataset.getUrl()); String metadata; try { metadata = indexMetadata(dataset, sid); } catch (IOException e) { LOGGER.error("Exception during indexing. Skipping this dataset.", e); continue dataset; } catch (TransformerException e) { LOGGER.error("Exception during indexing. Skipping this dataset.", e); continue dataset; } if (metadata.length() > 0) { sid.addField("dataset.metadata_t", metadata.toString()); summary.append(SPACE).append(metadata); } // Add summary field to allow searching documents for objects of this type sid.addField("dataset_solrsummary_t", summary); documents.add(sid); } try { SolrServer solrServer = DbDataset.solrServer(); solrServer.add(documents); solrServer.commit(); } catch (SolrServerException e) { LOGGER.error(INDEX_EXCEPTION_MESSAGE, e); } catch (IOException e) { LOGGER.error(INDEX_EXCEPTION_MESSAGE, e); } }
From source file:bamboo.trove.rule.RuleRecheckWorker.java
License:Apache License
@Override public void run() { Timer.Context context = manager.getTimer(manager.getName() + ".worker").time(); SolrInputDocument update = processResultsRecheckRule(); if (update != null) { manager.update(update);//from w ww .j a va 2s . c o m workLog.wroteDocument(); } else { // No write activity... but we still acknowledge it update = new SolrInputDocument(); update.addField(SolrEnum.ID.toString(), id); manager.acknowledge(update); } context.stop(); }