Example usage for org.apache.solr.common SolrInputDocument SolrInputDocument

List of usage examples for org.apache.solr.common SolrInputDocument SolrInputDocument

Introduction

In this page you can find the example usage for org.apache.solr.common SolrInputDocument SolrInputDocument.

Prototype

SolrInputDocument

Source Link

Usage

From source file:WikipediaModel.java

License:Open Source License

public SolrInputDocument getDocument() {
    SolrInputDocument doc = new SolrInputDocument();
    doc.addField("id", this.id);
    doc.addField("title", this.title);
    doc.addField("title_annotation", this.titleAnnotation);
    doc.addField("text", this.text);
    doc.addField("text_count", this.text.length());
    doc.addField("last_modified", this.lastModified);
    return doc;/*  www .  ja  v  a 2 s  .  c o  m*/
}

From source file:SolrUpdate.java

License:Apache License

public void setSOLRMetadata() {
    server = new HttpSolrServer("http://localhost:8983/solr");

    metadoc = new SolrInputDocument();
    metadoc.addField("pmid", selectedPub.getPmid());

    metadoc.addField("abstract", selectedPub.getAbstract());
    metadoc.addField("publicationdate_year", selectedPub.getYear());
    metadoc.addField("doi", selectedPub.getDoi());
    metadoc.addField("journalvolume", selectedPub.getJournalvolume());
    metadoc.addField("journalissue", selectedPub.getJournalissue());
    metadoc.addField("journalmonth", selectedPub.getJournalmonth());
    metadoc.addField("journalyear", selectedPub.getJournalyear());
    metadoc.addField("journalday", selectedPub.getJournalday());
    metadoc.addField("journalname", selectedPub.getJournalname());
    metadoc.addField("journalpage", selectedPub.getJournalstartpg());
    metadoc.addField("epubday", selectedPub.getEpubday());
    metadoc.addField("epubmonth", selectedPub.getEpubmonth());
    metadoc.addField("epubyear", selectedPub.getEpubyear());
    metadoc.addField("author_fullname_list", selectedPub.getAuthorfull());
    metadoc.addField("completion", selectedPub.getCompletion());
    metadoc.addField("draftpoint", selectedPub.getDraftpoint());

    metadoc.addField("lruid", selectedPub.getLruid());
    metadoc.addField("ptitle", selectedPub.getTitle());

    for (int i = 0; i < selectedPub.getFauthors().size(); i++) {
        metadoc.addField("author_firstname", selectedPub.getFauthors().get(i));
        metadoc.addField("author_lastname", selectedPub.getLauthors().get(i));
    }/*ww w .j a va  2 s .co m*/

    for (String currstring : selectedPub.getFilesanddata()) {
        metadoc.addField("pfileinfo", currstring);
    }

}

From source file:OSMHandler.java

License:Apache License

@Override
public void startElement(String uri, String localName, String qName, Attributes attributes)
        throws SAXException {
    if (qName.equals("node")) {
        current = new SolrInputDocument();
        long id = Long.parseLong(attributes.getValue("id"));
        current.addField("id", id);
        current.addField("type", "node");
        //NOTE: I'm doing this on the client side of Solr for demonstration purposes.  A more effective solution, coming in Solr 1.5, will handle indexing on the server side.
        double latitude = Double.parseDouble(attributes.getValue("lat"));
        double longitude = Double.parseDouble(attributes.getValue("lon"));
        current.addField("lat", latitude);
        current.addField("lon", longitude);
        //should use copy field, but do the conversion here
        current.addField("lat_rad", latitude * TO_RADS);
        current.addField("lon_rad", longitude * TO_RADS);

        //See http://en.wikipedia.org/wiki/Geohash
        String geoHash = GeoHashUtils.encode(latitude, longitude);
        current.addField("geohash", geoHash);
        //Cartesian Tiers
        int tier = START_TIER;
        //Create a bunch of tiers, each deeper level has more precision
        for (CartesianTierPlotter plotter : plotters) {
            current.addField("tier_" + tier, plotter.getTierBoxId(latitude, longitude));
            tier++;//ww  w . j  a  va2 s .co  m
        }
        int version = Integer.parseInt(attributes.getValue("version"));
        current.addField("version", version);
        String user = attributes.getValue("user");
        current.addField("user", user);
        String timestamp = attributes.getValue("timestamp");
        current.addField("timestamp", timestamp);

        addFacts(id);
        docBuffer.add(current);

    } else if (qName.equals("way")) {
        //A "way", is a bunch of nodes linked together, see the sample at the bottom
        //For ways, get the tag info just the same
        /*
         <way id="29124014" visible="true" timestamp="2008-12-21T04:44:36Z" version="2" changeset="514025" user="Shadow" uid="24160">
        <nd ref="320408502"/>
        <nd ref="320408503"/>
        <tag k="name" v="Gopher Way"/>
        <tag k="created_by" v="Potlatch 0.10f"/>
        <tag k="highway" v="footway"/>
        <tag k="source" v="knowledge; image"/>
        <tag k="layer" v="2"/>
        </way>
         */
        current = new SolrInputDocument();
        long id = Long.parseLong(attributes.getValue("id"));
        addFacts(id);
        current.addField("type", "way");
        current.addField("id", id);
        docBuffer.add(current);

    } else if (current != null && qName.equals("nd")) {
        long refId = Long.parseLong(attributes.getValue("ref"));
        current.addField("way_id", refId);

    } else if (current != null && qName.equals("tag")) {
        String key = attributes.getValue("k");
        String value = attributes.getValue("v");
        //is it a specific one we care about?  If not, just add as a dynamic field
        //TODO: make this scalable
        //Could likely make this more pluggable, too, to give people a chance to deal with the tags in their own way
        if (key.equals("ele")) {
            key = "ele";
        } else if ((key.indexOf("County") != -1 || key.indexOf("county") != -1)
                && key.equals("gnis:County_num") == false) {
            key = "county";
        } else if (key.equals("source") || key.equals("name") || key.equals("population")) {
            //fall through
        } else if (key.equals("is_in")) {
            //Split on comma, reverse the array and add from large to small
            String[] splits = value.split(",");
            StringBuilder newV = new StringBuilder(value.length());
            for (int i = 0; i < splits.length; i++) {
                newV.append(splits[i]);
                if (i < splits.length - 1) {
                    newV.append(' ');
                }
            }
            value = newV.toString();

        } else {
            //How should we deal with other keys? let's add them as Strings
            key += "_s";
        }
        current.addField(key, value);
    } else if (current != null && qName.equals("bounds")) {
        //Should we just ignore or index it onto every doc?
    }
}

From source file:at.newmedialab.lmf.search.services.indexing.SolrIndexingServiceImpl.java

License:Apache License

@Override
public void indexResource(Resource resource, SolrCoreRuntime runtime) {
    Program<Value> program = runtime.getConfiguration().getProgram();
    if (program == null) {
        try {/*  w w  w.j  a v a2  s.c  o m*/
            program = solrProgramService
                    .parseProgram(new StringReader(runtime.getConfiguration().getProgramString()));
            runtime.getConfiguration().setProgram(program);
        } catch (LDPathParseException e) {
            log.error("error parsing path program for engine {}", runtime.getConfiguration().getName(), e);
            return;
        }
    }

    if (resource == null)
        return;
    final String coreName = runtime.getConfiguration().getName();
    final String rID = getResourceId(resource);

    try {
        final RepositoryConnection connection = sesameService.getConnection();
        try {
            connection.begin();

            //if (resource instanceof KiWiResource && ((KiWiResource) resource).isDeleted()) {
            //    runtime.queueDeletion(rID);
            //}
            //FIXME: find a proper way to do this with the new api
            boolean deleted = true;
            RepositoryResult<Statement> statements = connection.getStatements(resource, null, null, false);
            while (statements.hasNext()) {
                if (!ResourceUtils.isDeleted(connection, statements.next())) {
                    deleted = false;
                    break;
                }
            }
            if (deleted) {
                runtime.queueDeletion(rID);
            }

            final Resource[] contexts;
            if (program.getGraphs().isEmpty()) {
                contexts = new Resource[0];
            } else {
                contexts = Collections2.transform(program.getGraphs(), new Function<java.net.URI, URI>() {
                    @Override
                    public URI apply(java.net.URI in) {
                        return connection.getValueFactory().createURI(in.toString());
                    }
                }).toArray(new Resource[0]);
            }

            final SesameConnectionBackend backend = ContextAwareSesameConnectionBackend
                    .withConnection(connection, contexts);
            if (program.getFilter() != null
                    && !program.getFilter().apply(backend, resource, Collections.singleton((Value) resource))) {
                if (log.isDebugEnabled()) {
                    log.debug("({}) <{}> does not match filter '{}', ignoring", coreName, resource,
                            program.getFilter().getPathExpression(backend));
                }
                // Some resources might be still in the index, so delete it.
                runtime.queueDeletion(rID);
                connection.commit();
                return;
            } else if (log.isTraceEnabled() && program.getFilter() != null) {
                log.trace("({}) <{}> matches filter '{}', indexing...", coreName, resource,
                        program.getFilter().getPathExpression(backend));
            }

            SolrInputDocument doc = new SolrInputDocument();

            doc.addField("id", rID);
            doc.addField("lmf.indexed", new Date());
            if (resource instanceof KiWiUriResource) {
                doc.addField("lmf.created", ((KiWiUriResource) resource).getCreated());
            }

            if (resource instanceof URI) {
                URI r = (URI) resource;

                doc.addField("lmf.uri", r.stringValue());
            } else if (resource instanceof BNode) {
                BNode r = (BNode) resource;
                doc.addField("lmf.anon_id", r.getID());
            } else {
                // This should not happen, but never the less...
                log.warn("Tried to index a Resource that is neither a URI nor BNode: {}", resource);
                runtime.queueDeletion(rID);
                connection.rollback();
                return;
            }

            for (Resource type : getTypes(connection, resource)) {
                if (type instanceof KiWiUriResource) {
                    doc.addField("lmf.type", type.stringValue());
                }
            }

            // Set the document boost
            if (program.getBooster() != null) {
                final Collection<Float> boostValues = program.getBooster().getValues(backend, resource);
                if (boostValues.size() > 0) {
                    final Float docBoost = boostValues.iterator().next();
                    if (boostValues.size() > 1) {
                        log.warn("found more than one boostFactor for <{}>, using {}", resource, docBoost);
                    }
                    doc.setDocumentBoost(docBoost);
                }
            }

            // set shortcut fields
            Set<Value> dependencies = new HashSet<Value>();
            for (FieldMapping<?, Value> rule : program.getFields()) {
                //                    Map<Value, List<Value>> paths = new HashMap<Value, List<Value>>();
                //                    Collection<?> values = rule.getValues(backend, resource, paths);
                //FIXME: Temporary fixing due LDPath reverse properties selector bug
                Map<Value, List<Value>> paths = null;
                Collection<?> values = null;
                if (runtime.getConfiguration().isUpdateDependencies()) {
                    paths = new HashMap<Value, List<Value>>();
                    values = rule.getValues(backend, resource, paths);
                } else {
                    values = rule.getValues(backend, resource);
                }
                //
                try {
                    final boolean isSinge = !isMultiValuedField(rule);
                    for (Object value : values) {
                        if (value != null) {
                            doc.addField(rule.getFieldName(), value);
                            if (isSinge) {
                                break;
                            }
                        }
                    }
                    if (rule.getFieldConfig() != null) {
                        final String b = rule.getFieldConfig().get("boost");
                        try {
                            if (b != null) {
                                doc.getField(rule.getFieldName()).setBoost(Float.parseFloat(b));
                            }
                        } catch (NumberFormatException e) {
                            throw new NumberFormatException("could not parse boost value: '" + b + "'");
                        }
                    }
                } catch (Exception ex) {
                    log.error("({}) exception while building path indexes for <{}>, field {}: {}", coreName,
                            resource, rule.getFieldName(), ex.getMessage());
                    log.debug("(" + coreName + ") stacktrace", ex);
                }
                if (runtime.getConfiguration().isUpdateDependencies()) {
                    for (List<Value> path : paths.values()) {
                        dependencies.addAll(path);
                    }
                }
            }

            if (runtime.getConfiguration().isUpdateDependencies()) {
                for (Value dependency : dependencies) {
                    if (dependency instanceof URI && !dependency.equals(resource)) {
                        doc.addField("lmf.dependencies", dependency.stringValue());
                    }
                }
            }

            runtime.queueInputDocument(doc);

            connection.commit();
        } finally {
            connection.close();
        }
    } catch (RepositoryException e) {
        log.warn("Could not build index document for " + resource.stringValue(), e);
    } catch (Throwable t) {
        log.error("unknown error while indexing document", t);
    }
}

From source file:at.pagu.soldockr.core.convert.MappingSolrConverterTest.java

License:Apache License

@Test
public void testWrite() {
    ConvertableBean convertable = new ConvertableBean("j73x73r", 1979);
    SolrInputDocument solrDocument = new SolrInputDocument();
    converter.write(convertable, solrDocument);

    Assert.assertEquals(convertable.getStringProperty(), solrDocument.getFieldValue("stringProperty"));
    Assert.assertEquals(convertable.getIntProperty(), solrDocument.getFieldValue("intProperty"));
}

From source file:at.pagu.soldockr.core.SolrTemplate.java

License:Apache License

@Override
public SolrInputDocument convertBeanToSolrInputDocument(Object bean) {
    if (bean instanceof SolrInputDocument) {
        return (SolrInputDocument) bean;
    }/*from   w  ww .j  a va  2s  . c o  m*/

    SolrInputDocument document = new SolrInputDocument();
    getConverter().write(bean, document);
    return document;
}

From source file:au.edu.aekos.shared.solr.index.SubmissionSolrDocumentBuilder.java

public static SolrInputDocument initialiseSHaREDDocument(String id, String doi, String title,
        String datasetNameForCitation, String datasetAbstract, Date firstVisit, Date lastVisit) {
    SolrInputDocument doc = new SolrInputDocument();
    doc.addField("id", id);
    doc.addField("doi", doi);
    doc.addField("title", title);
    doc.addField("datasetFormalName_t", title);
    doc.addField("datasetNameForCitation_t", datasetNameForCitation);
    doc.addField("description", datasetAbstract);
    doc.addField("datasetAbstract_t", datasetAbstract);
    doc.addField("firstStudyAreaVisitDate_dt", firstVisit);
    doc.addField("lastStudyAreaVisitDate_dt", firstVisit);
    return doc;/*from  ww  w . ja v a2s  .  c o m*/
}

From source file:au.org.ala.bhl.service.IndexingService.java

License:Open Source License

/**
 * Send a page of text to SOLR for indexing
 * /*from  w  w w.j  a  va  2 s. c  o  m*/
 * @param item
 * @param pageId
 * @param pageText
 * @param server
 */
private void indexPage(ItemDescriptor item, String pageId, String pageText, SolrServer server) {
    if (!StringUtils.isEmpty(pageText)) {
        SolrInputDocument doc = new SolrInputDocument();
        doc.addField("id", pageId, 1.0f);
        doc.addField("name", item.getTitle(), 1.0f);
        doc.addField("text", pageText);
        doc.addField("internetArchiveId", item.getInternetArchiveId());
        doc.addField("itemId", item.getItemId());
        doc.addField("pageId", pageId, 1.0f);
        doc.addField("pageUrl", String.format("http://bhl.ala.org.au/pageimage/%s", pageId));

        JsonNode metadata = _docCache.getItemMetaData(item);
        if (metadata != null) {
            addItemMetadata(doc, metadata);
            JsonNode titleData = _docCache.getTitleMetaData(item);
            if (titleData != null) {
                addTitleMetadata(doc, titleData);
            }
        }

        try {
            server.add(doc);
        } catch (Exception ex) {
            throw new RuntimeException(ex);
        }
    }
}

From source file:au.org.intersect.dms.catalogue.db.DatasetIndexer.java

License:Open Source License

public void indexDatasets(Collection<DbDataset> datasets) {
    List<SolrInputDocument> documents = new ArrayList<SolrInputDocument>();

    dataset: for (DbDataset dataset : datasets) {
        LOGGER.debug("Indexing dataset with ID {}", dataset.getId());
        SolrInputDocument sid = new SolrInputDocument();
        addBasicFields(dataset, sid);/* w  w  w.  jav  a 2  s .  c o  m*/
        StringBuilder summary = new StringBuilder();
        summary.append(dataset.getCreationDate()).append(SPACE).append(dataset.getId()).append(SPACE)
                .append(dataset.getOwner()).append(SPACE).append(dataset.getUrl());

        String metadata;
        try {
            metadata = indexMetadata(dataset, sid);
        } catch (IOException e) {
            LOGGER.error("Exception during indexing. Skipping this dataset.", e);
            continue dataset;
        } catch (TransformerException e) {
            LOGGER.error("Exception during indexing. Skipping this dataset.", e);
            continue dataset;
        }

        if (metadata.length() > 0) {
            sid.addField("dataset.metadata_t", metadata.toString());
            summary.append(SPACE).append(metadata);
        }

        // Add summary field to allow searching documents for objects of this type
        sid.addField("dataset_solrsummary_t", summary);

        documents.add(sid);
    }

    try {
        SolrServer solrServer = DbDataset.solrServer();
        solrServer.add(documents);
        solrServer.commit();
    } catch (SolrServerException e) {
        LOGGER.error(INDEX_EXCEPTION_MESSAGE, e);
    } catch (IOException e) {
        LOGGER.error(INDEX_EXCEPTION_MESSAGE, e);
    }
}

From source file:bamboo.trove.rule.RuleRecheckWorker.java

License:Apache License

@Override
public void run() {
    Timer.Context context = manager.getTimer(manager.getName() + ".worker").time();
    SolrInputDocument update = processResultsRecheckRule();
    if (update != null) {
        manager.update(update);//from  w ww  .j  a  va 2s  . c  o  m
        workLog.wroteDocument();
    } else {
        // No write activity... but we still acknowledge it
        update = new SolrInputDocument();
        update.addField(SolrEnum.ID.toString(), id);
        manager.acknowledge(update);
    }
    context.stop();
}