Java tutorial
/** * Copyright 2015 DuraSpace, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.fcrepo.indexer.sparql; import static com.google.common.util.concurrent.MoreExecutors.listeningDecorator; import static com.hp.hpl.jena.sparql.util.Context.emptyContext; import static com.hp.hpl.jena.update.UpdateExecutionFactory.createRemoteForm; import static java.util.concurrent.Executors.newFixedThreadPool; import static org.fcrepo.indexer.Indexer.IndexerType.RDF; import static org.slf4j.LoggerFactory.getLogger; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.OutputStream; import java.net.URI; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.concurrent.Callable; import com.google.common.annotations.VisibleForTesting; import com.google.common.util.concurrent.ListenableFutureTask; import com.google.common.util.concurrent.ListeningExecutorService; import com.hp.hpl.jena.rdf.model.Model; import com.hp.hpl.jena.rdf.model.StmtIterator; import com.hp.hpl.jena.graph.Node_URI; import com.hp.hpl.jena.graph.Triple; import com.hp.hpl.jena.sparql.engine.http.QueryEngineHTTP; import com.hp.hpl.jena.sparql.modify.UpdateProcessRemote; import com.hp.hpl.jena.sparql.modify.request.QuadDataAcc; import com.hp.hpl.jena.sparql.modify.request.UpdateDataInsert; import com.hp.hpl.jena.update.UpdateProcessor; import com.hp.hpl.jena.update.UpdateRequest; import org.apache.jena.atlas.io.IndentedWriter; import org.fcrepo.indexer.AsynchIndexer; import org.slf4j.Logger; /** * Indexes triples from Fedora into a triplestore using SPARQL Update. * * @author Esm Cowles * @author ajs6f * @since Aug 19, 2013 **/ public class SparqlIndexer extends AsynchIndexer<Model, Void> { private String queryBase; private String updateBase; private boolean formUpdates = false; private static final Logger LOGGER = getLogger(SparqlIndexer.class); /** * Number of threads to use for operating against the triplestore. */ private static final Integer THREAD_POOL_SIZE = 5; private ListeningExecutorService executorService = listeningDecorator(newFixedThreadPool(THREAD_POOL_SIZE)); /** * Remove any current triples about the Fedora object and replace them with * the provided content. * {@literal @content RDF in N3 format.} **/ @Override public Callable<Void> updateSynch(final URI pid, final Model model) { LOGGER.debug("Received update for: {}", pid); removeSynch(pid, false, true); // build a list of triples final StmtIterator triples = model.listStatements(); final QuadDataAcc add = new QuadDataAcc(); while (triples.hasNext()) { add.addTriple(triples.nextStatement().asTriple()); } // send update to server LOGGER.debug("Sending update request for pid: {}", pid); return exec(new UpdateRequest(new UpdateDataInsert(add))); } /** * Perform a DESCRIBE query for triples about the Fedora object and remove * all triples with subjects starting with the same subject. **/ @Override public Callable<Void> removeSynch(final URI subject) { return removeSynch(subject, true, false); } @VisibleForTesting protected Callable<Void> removeSynch(final URI subject, final boolean recursive, final boolean blocking) { LOGGER.debug("Received remove for: {}", subject); // find triples/quads to delete final String describeQuery = "DESCRIBE <" + subject.toString() + ">"; final QueryEngineHTTP qexec = buildQueryEngineHTTP(describeQuery); final Iterator<Triple> results = qexec.execDescribeTriples(); // build list of triples to delete final Set<String> uris = new HashSet<>(); while (results.hasNext()) { final Triple triple = results.next(); // add subject uri, if it is part of this object if (triple.getSubject().isURI()) { final String uri = ((Node_URI) triple.getSubject()).getURI(); if (matches(subject, uri)) { uris.add(uri); } } if (recursive) { // add object uri, if it is part of this object if (triple.getObject().isURI()) { final String uri = ((Node_URI) triple.getObject()).getURI(); if (matches(subject, uri)) { uris.add(uri); } } } } qexec.close(); // build update commands final UpdateRequest del = buildUpdateRequest(); for (final String uri : uris) { final String cmd = "DELETE WHERE { <" + uri + "> ?p ?o }"; LOGGER.debug("Executing: {}", cmd); del.add(cmd); } // send updates return exec(del, blocking); } /** * Determine whether arg candidate is a sub-URI of arg resource, defined as candidate-URI starting * with resource-URI, plus an option suffix starting with a hash (#) or slash (/) * suffix. **/ private boolean matches(final URI resource, final String candidate) { // All triples that will match this logic are ones that: // - have a candidate subject or object that equals the target resource of removal, or // - have a candidate subject or object that is prefixed with the resource of removal // (therefore catching all children). return resource.toString().equals(candidate) || candidate.startsWith(resource.toString() + "/") || candidate.startsWith(resource.toString() + "#"); } private Callable<Void> exec(final UpdateRequest update) { return exec(update, false); } private Callable<Void> exec(final UpdateRequest update, final boolean blocking) { if (update.getOperations().isEmpty()) { LOGGER.debug("Received empty update/remove operation."); return new Callable<Void>() { @Override public Void call() { return null; } }; } final Callable<Void> callable = new Callable<Void>() { @Override public Void call() { if (formUpdates) { // form updates final UpdateProcessor proc = createRemoteForm(update, updateBase); proc.execute(); } else { // normal SPARQL updates final UpdateProcessRemote proc = new UpdateProcessRemote(update, updateBase, emptyContext); try { proc.execute(); } catch (final Exception e) { LOGGER.error("Error executing Sparql update/remove!", e); } } return null; } }; if (blocking) { try { callable.call(); } catch (Exception e) { LOGGER.error("Error calling Sparql update/remove!, {}", e.getMessage()); } } else { final ListenableFutureTask<Void> task = ListenableFutureTask.create(callable); task.addListener(new Runnable() { @Override public void run() { LOGGER.debug("Completed Sparql update/removal."); if (LOGGER.isTraceEnabled()) { try (final OutputStream buffer = new ByteArrayOutputStream()) { final IndentedWriter out = new IndentedWriter(buffer); update.output(out); LOGGER.trace("Executed update/remove operation:\n{}", buffer.toString()); out.close(); } catch (final IOException e) { LOGGER.error("Couldn't retrieve execution of update/remove operation!", e); } } } }, executorService); executorService.submit(task); } return callable; } @Override public IndexerType getIndexerType() { return RDF; } /** * Set whether to use SPARQL Update or form updates. * @param b boolean value for whether to use SPARQL update or form update **/ public void setFormUpdates(final boolean b) { this.formUpdates = b; } /** * Set base URL for SPARQL Query requests. * @param url base URL for SPARQL query request **/ public void setQueryBase(final String url) { this.queryBase = url; } /** * Set base URL for SPARQL Update requests. * @param url base URL for SPARQL update requests **/ public void setUpdateBase(final String url) { this.updateBase = url; } @Override public ListeningExecutorService executorService() { return executorService; } /** * Note: Protected for Unit Tests to overwrite. * @param describeQuery the describe query * @return the query engine http */ protected QueryEngineHTTP buildQueryEngineHTTP(final String describeQuery) { return new QueryEngineHTTP(queryBase, describeQuery); } /** * Note: Protected for Unit Tests to overwrite. * @return update request */ protected UpdateRequest buildUpdateRequest() { return new UpdateRequest(); } }