Java tutorial
/******************************************************************************* * Copyright (c) 2013 * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Public License v2.0 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html * * Contributors: * Lautaro Matas (lmatas@gmail.com) - Desarrollo e implementacin * Emiliano Marmonti(emarmonti@gmail.com) - Coordinacin del componente III * * Este software fue desarrollado en el marco de la consultora "Desarrollo e implementacin de las soluciones - Prueba piloto del Componente III -Desarrollador para las herramientas de back-end" del proyecto Estrategia Regional y Marco de Interoperabilidad y Gestin para una Red Federada Latinoamericana de Repositorios Institucionales de Documentacin Cientfica? financiado por Banco Interamericano de Desarrollo (BID) y ejecutado por la Cooperacin Latino Americana de Redes Avanzadas, CLARA. ******************************************************************************/ package org.lareferencia.backend.indexer; import java.io.File; import org.apache.commons.codec.digest.DigestUtils; import java.io.IOException; import java.io.StringWriter; import java.security.MessageDigest; import java.util.List; import javax.xml.transform.OutputKeys; import javax.xml.transform.Result; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerConfigurationException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.transform.stream.StreamSource; import org.apache.solr.client.solrj.SolrServerException; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.DirectXmlRequest; import org.lareferencia.backend.domain.NetworkSnapshot; import org.lareferencia.backend.domain.OAIRecord; import org.lareferencia.backend.domain.RecordStatus; import org.lareferencia.backend.harvester.OAIRecordMetadata; import org.lareferencia.backend.repositories.NetworkSnapshotRepository; import org.lareferencia.backend.repositories.OAIRecordRepository; import org.lareferencia.backend.util.MedatadaDOMHelper; import org.springframework.beans.factory.annotation.Autowired; import org.springframework.data.domain.Page; import org.springframework.data.domain.PageRequest; public class IndexerImpl implements IIndexer { private File stylesheet; private static TransformerFactory xformFactory = TransformerFactory.newInstance(); private static final int PAGE_SIZE = 1000; @Autowired private OAIRecordRepository recordRepository; @Autowired private NetworkSnapshotRepository networkSnapshotRepository; private String solrURL; public IndexerImpl(String xslFileName, String solrURL) throws IndexerException { this.stylesheet = new File(xslFileName); this.solrURL = solrURL; } private Transformer buildTransformer() throws IndexerException { Transformer trf; try { StreamSource stylesource = new StreamSource(stylesheet); trf = xformFactory.newTransformer(stylesource); trf = MedatadaDOMHelper.buildXSLTTransformer(stylesheet); trf.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes"); trf.setOutputProperty(OutputKeys.INDENT, "no"); trf.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); } catch (TransformerConfigurationException e) { throw new IndexerException(e.getMessage(), e.getCause()); } return trf; } /* Este mtodo es syncronized para asegurar que no se superpongan dos indexaciones y los commits solr (not isolated) se produzan*/ public synchronized boolean index(NetworkSnapshot snapshot) { try { // Borrado de los docs del pas del snapshot MessageDigest md = MessageDigest.getInstance("MD5"); String countryISO = snapshot.getNetwork().getCountryISO(); this.sendUpdateToSolr( "<delete><query>country_iso:" + snapshot.getNetwork().getCountryISO() + "</query></delete>"); // Update de los registros de a PAGE_SIZE Page<OAIRecord> page = recordRepository.findBySnapshotIdAndStatus(snapshot.getId(), RecordStatus.VALID, new PageRequest(0, PAGE_SIZE)); int totalPages = page.getTotalPages(); Long lastId = -1L; // Aqu se guarda el ultimo id de cada pgina para ser usado en el la query optimizada for (int i = 0; i < totalPages; i++) { Transformer trf = buildTransformer(); trf.setParameter("country_iso", countryISO); trf.setParameter("country", snapshot.getNetwork().getName()); //page = recordRepository.findBySnapshotIdAndStatusLimited(snapshot.getId(), RecordStatus.VALID, lastId, new PageRequest(0, PAGE_SIZE) ); page = recordRepository.findBySnapshotIdAndStatus(snapshot.getId(), RecordStatus.VALID, new PageRequest(i, PAGE_SIZE)); System.out.println("Indexando Snapshot: " + snapshot.getId() + " de: " + snapshot.getNetwork().getName() + " pgina: " + i + " de: " + totalPages); StringBuffer strBuf = new StringBuffer(); List<OAIRecord> records = page.getContent(); for (OAIRecord record : records) { OAIRecordMetadata domRecord = new OAIRecordMetadata(record.getIdentifier(), record.getPublishedXML()); StringWriter stringWritter = new StringWriter(); Result output = new StreamResult(stringWritter); // id unico pero mutable para solr trf.setParameter("solr_id", countryISO + "_" + snapshot.getId().toString() + "_" + record.getId().toString()); // id permantente para vufind trf.setParameter("vufind_id", countryISO + "_" + DigestUtils.md5Hex(record.getPublishedXML())); // header id para staff trf.setParameter("header_id", record.getIdentifier()); // Se transforma y genera el string del registro trf.transform(new DOMSource(domRecord.getDOMDocument()), output); strBuf.append(stringWritter.toString()); // Se actualiza el lastID para permitir la paginacin con offset 0 //lastId = records.get( records.size()-1 ).getId(); } this.sendUpdateToSolr("<add>" + strBuf.toString() + "</add>"); trf = null; page = null; strBuf = null; } // commit de los cambios this.sendUpdateToSolr("<commit/>"); } catch (Exception e) { e.printStackTrace(); try { this.sendUpdateToSolr("<rollback/>"); } catch (SolrServerException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } catch (IOException e1) { // TODO Auto-generated catch block e1.printStackTrace(); } return false; } return true; } private void sendUpdateToSolr(String data) throws SolrServerException, IOException { HttpSolrServer server = new HttpSolrServer(solrURL); DirectXmlRequest request = new DirectXmlRequest("/update", data); server.request(request); server = null; //System.out.println(data); } }