Java tutorial
/* * To change this license header, choose License Headers in Project Properties. * To change this template file, choose Tools | Templates * and open the template in the editor. */ package it.intecs.pisa.openCatalogue.solr.ingester; import com.google.gson.JsonArray; import com.google.gson.JsonObject; import it.intecs.pisa.gis.util.CoordinatesUtil; import it.intecs.pisa.log.Log; import it.intecs.pisa.metadata.filesystem.AbstractFilesystem; import it.intecs.pisa.metadata.filesystem.FileFilesystem; import it.intecs.pisa.metadata.filters.EndsWith; import it.intecs.pisa.metadata.filters.NotEndsWith; import it.intecs.pisa.metadata.filters.RegExpression; import it.intecs.pisa.openCatalogue.saxon.SaxonDocument; import it.intecs.pisa.openCatalogue.solr.SolrHandler; import it.intecs.pisa.util.schemas.SchemaCache; import it.intecs.pisa.util.schemas.SchemasUtil; import it.intecs.pisa.util.schematron.Schematron; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.StringWriter; import java.util.HashMap; import net.sf.saxon.s9api.SaxonApiException; import org.apache.velocity.VelocityContext; import org.apache.velocity.app.VelocityEngine; import org.apache.velocity.runtime.RuntimeConstants; import org.apache.velocity.tools.generic.DateTool; import org.apache.velocity.tools.generic.MathTool; import org.jdom2.Document; import org.jdom2.Element; import org.jdom2.filter.Filters; import org.jdom2.output.XMLOutputter; import org.jdom2.xpath.XPathExpression; import org.jdom2.xpath.XPathFactory; import org.xml.sax.SAXException; /** * * @author massi */ public abstract class BaseIngester { public static final String FILENAME = "filename"; public static final String HARVEST_FILE_NAME = "*.index"; public static final String METADATA_REPORT_TEMPLATE = "metadataReportsType.vm"; public static final String SLASH = "/"; public static final String STRING_PERIOD = "##PERIOD##"; public static final String TAG_DEFAULT_VALUE = "defaultValue"; public static final String TAG_INDEX_FIELD_NAME = "indexFieldName"; public static final String VELOCITY_DATE = "date"; public static final String VELOCITY_MATH = "math"; public static final String VELOCITY_METADATA_LIST = "metadata"; public static final String VELOCITY_PERIOD_END = "PERIOD_END"; public static final String VELOCITY_PERIOD_START = "PERIOD_START"; public static final String VELOCITY_PLATFORM_SHORT_NAME = "PLATFORM_SHORT_NAME"; public static final String VELOCITY_INSTRUMENT_SHORT_NAME = "INSTRUMENT_SHORT_NAME"; public static final String VELOCITY_OPERATIONAL_MODE = "OPERATIONAL_MODE"; public static final String VELOCITY_PRODUCT_TYPE = "PRODUCT_TYPE"; public static final String BROWSE_FROM_HARVEST = "browse_from_harvest"; protected static final String INGESTION_ITEM_STATUS_SUCCESS = "success"; protected static final String INGESTION_ITEM_STATUS_FAILURE = "failure"; protected static final String INGESTION_ITEM_STATUS_FAILURE_ON_VALLIDATION = "Item is not schema or schematron valid."; protected AbstractFilesystem metadataRepository; protected VelocityEngine ve; protected SaxonDocument configDocument; protected String sensorType; protected String dateTimeFormat; protected String elements_separator; protected SolrHandler solr = null; private Schematron schematron = null; private AbstractFilesystem schemaRoot = null; private AbstractFilesystem schemaFile = null; private static SchemaCache schemaCache = null; /* These field are being set during initialization through reflection */ public String idXPath; public String solrRequestTemplate; public String format; public void setConfiguration(AbstractFilesystem configDirectory) throws SAXException, IOException, SaxonApiException, Exception { ve = new VelocityEngine(); ve.setProperty(RuntimeConstants.RESOURCE_LOADER, "file"); ve.setProperty(RuntimeConstants.FILE_RESOURCE_LOADER_PATH, configDirectory.getAbsolutePath()); } public JsonObject ingestData(AbstractFilesystem file, HashMap<String, String> queryHeaders) throws Exception { if (file.isFile()) { Log.debug("Trying to ingest file " + file.getName()); String fileName = file.getName(); Document[] metadata = parse(file, queryHeaders); HashMap<String, String> ingestionStatuses = new HashMap<String, String>(); for (Document doc : metadata) { boolean isValid = true; String status = INGESTION_ITEM_STATUS_FAILURE_ON_VALLIDATION; isValid = validateMetadata(doc); if (isValid) { isValid = validateThroughSchematron(); storeMetadata(doc, isValid); if (isValid) { status = uploadMetadataToSolr(doc); } else { Log.error("Metadata is not schematron valid."); } } else { Log.error("Metadata file " + fileName + " (or part of it) is not valid. Ingestion is skipped."); } String itemId = getItemId(doc); ingestionStatuses.put(itemId, status); } return createIngestionResponse(ingestionStatuses); } return null; } public void ingestDataFromDir(AbstractFilesystem dir, HashMap<String, String> queryHeaders, Boolean includeSubdirs, String fileFilter) throws Exception { //AbstractFilesystem[] files = dir.list(false, null); AbstractFilesystem[] files = null; if (null != fileFilter) { NotEndsWith filters = new NotEndsWith(fileFilter); files = dir.list(includeSubdirs, filters); } else files = dir.list(includeSubdirs, null); int fileNo = getFilesNumber(files); Log.info("Going to process " + fileNo + " file(s)"); int totalingested = 0; int failed = 0; for (AbstractFilesystem file : files) { try { if (file.isFile()) { fileNo--; ingestData(file, queryHeaders); Log.info("[" + fileNo + "] files remaining"); totalingested++; } } catch (Exception e) { failed++; Log.error("Failed to ingest " + file.getName()); Log.error(e.getMessage()); } } Log.info(" ************************************************************************"); Log.info(" * Total processed " + totalingested); Log.info(" * No of failure " + failed); Log.info(" ************************************************************************"); } protected String uploadMetadataToSolr(org.jdom2.Document metadata) throws IOException, SaxonApiException, Exception { int retValue = 500; if (solr != null) { VelocityContext context = new VelocityContext(); context.put(VELOCITY_DATE, new DateTool()); context.put(VELOCITY_MATH, new MathTool()); context.put("coordinates", new CoordinatesUtil()); context.put("metadataDocument", metadata); StringWriter swOut = new StringWriter(); ve.getTemplate(solrRequestTemplate).merge(context, swOut); retValue = solr.postDocument(swOut.toString()); // only for debug .... TODO - remove it String key = getItemId(metadata); saveSolrfile(swOut.toString(), key); // only for debug .... TODO - remove it swOut.close(); } return retValue == 200 ? INGESTION_ITEM_STATUS_SUCCESS : INGESTION_ITEM_STATUS_FAILURE; } private boolean validateThroughSchematron() { //temporarily disabled /* * if (metadataRepository != null) { AbstractFilesystem metadataFile = * storeMetadata(metadata, isValid); if (schematron != null) { * schematron.Validate(metadataFile); } } */ return true; } protected JsonObject createIngestionResponse(HashMap<String, String> ingestionStatuses) { JsonObject response = new JsonObject(); JsonArray array = new JsonArray(); int success = 0; int failure = 0; int total = ingestionStatuses.size(); for (String key : ingestionStatuses.keySet()) { String itemStatus = ingestionStatuses.get(key); JsonObject item = new JsonObject(); item.addProperty("id", key); item.addProperty("status", ingestionStatuses.get(key)); array.add(item); if (itemStatus.equals("success") == false) { failure++; } else success++; } response.addProperty("total", total); response.addProperty("success", success); response.addProperty("failure", failure); if (array.size() > 0) response.add("report", array); return response; } private void saveSolrfile(String swOut, String key) throws IOException { Log.debug(swOut); if (metadataRepository != null) { FileFilesystem fs = new FileFilesystem(metadataRepository.getAbsolutePath() + "/" + key + ".slr"); fs.getOutputStream().write(swOut.getBytes()); } } private FileFilesystem storeMetadata(org.jdom2.Document metadata, boolean isValid) throws IOException { if (metadataRepository != null) { XPathExpression<Element> xpath = XPathFactory.instance().compile(idXPath, Filters.element()); String key = xpath.evaluateFirst(metadata.getRootElement()).getTextTrim().replace(":", "_").replace(".", "_"); FileFilesystem fs = null; if (isValid) { fs = new FileFilesystem(metadataRepository.getAbsolutePath() + "/" + key + ".xml"); } else { fs = new FileFilesystem(metadataRepository.getAbsolutePath() + "/" + key + ".notValid.xml"); } XMLOutputter outputter = new XMLOutputter(); String metadataString = outputter.outputString(metadata); byte[] b = metadataString.getBytes(); fs.getOutputStream().write(b); return fs; } else { return null; } } private boolean validateMetadata(Document metadata) { if (schemaRoot != null && schemaFile != null) { try { XMLOutputter outputter = new XMLOutputter(); String metadataString = outputter.outputString(metadata); byte[] b = metadataString.getBytes(); return SchemasUtil.SAXvalidate(new ByteArrayInputStream(b), schemaFile, schemaRoot, schemaCache); } catch (Exception e) { return false; } } else { return true; } } public void setSchema(AbstractFilesystem schemaFolder, AbstractFilesystem schema) { schemaRoot = schemaFolder; schemaFile = schema; if (schemaRoot != null && schemaFile != null) { schemaCache = new SchemaCache(schemaRoot); } } public void setSolrURL(String solrURL) { if (solrURL != null && !solrURL.equals("")) { solr = new SolrHandler(solrURL); } } public void setSchematronURI(String schematronURI) { if (schematronURI != null && !schematronURI.equals("")) { schematron = new Schematron(schematronURI); } } public void setRunsOnTomcat() { if (ve != null) { ve.setProperty("runtime.log.logsystem.class", "org.apache.velocity.runtime.log.NullLogSystem"); } } public void setMetedateRepository(AbstractFilesystem repo) { if (repo != null) { this.metadataRepository = repo; this.metadataRepository.mkdirs(); } } protected String getItemId(Document doc) { XPathExpression<Element> xpath = XPathFactory.instance().compile(idXPath, Filters.element()); String key = xpath.evaluateFirst(doc.getRootElement()).getTextTrim(); return key; } /** * This method parses the input file and provides an OEM version of data * * @param file * @return */ protected abstract Document[] parse(AbstractFilesystem file, HashMap<String, String> httpQueryData); public void install(AbstractFilesystem folder) { } private Integer getFilesNumber(AbstractFilesystem[] files) { Integer fn = 0; for (AbstractFilesystem file : files) { if (file.isFile()) fn++; } return fn; } }