List of usage examples for org.xml.sax.helpers AttributesImpl addAttribute
public void addAttribute(String uri, String localName, String qName, String type, String value)
From source file:org.apache.tika.parser.pdf.PDF2XHTML.java
private void handleSignature(AttributesImpl parentAttributes, PDSignatureField sigField, XHTMLContentHandler handler) throws SAXException { PDSignature sig = sigField.getSignature(); if (sig == null) { return;//from www.j a v a2 s. c o m } Map<String, String> vals = new TreeMap<>(); vals.put("name", sig.getName()); vals.put("contactInfo", sig.getContactInfo()); vals.put("location", sig.getLocation()); vals.put("reason", sig.getReason()); Calendar cal = sig.getSignDate(); if (cal != null) { dateFormat.setTimeZone(cal.getTimeZone()); vals.put("date", dateFormat.format(cal.getTime())); } //see if there is any data int nonNull = 0; for (String val : vals.keySet()) { if (val != null && !val.equals("")) { nonNull++; } } //if there is, process it if (nonNull > 0) { handler.startElement("li", parentAttributes); AttributesImpl attrs = new AttributesImpl(); attrs.addAttribute("", "type", "type", "CDATA", "signaturedata"); handler.startElement("ol", attrs); for (Map.Entry<String, String> e : vals.entrySet()) { if (e.getValue() == null || e.getValue().equals("")) { continue; } attrs = new AttributesImpl(); attrs.addAttribute("", "signdata", "signdata", "CDATA", e.getKey()); handler.startElement("li", attrs); handler.characters(e.getValue()); handler.endElement("li"); } handler.endElement("ol"); handler.endElement("li"); } }
From source file:org.apache.tika.parser.pdf18.PDF2XHTML.java
private void extractImages(PDResources resources, Set<COSBase> seenThisPage) throws SAXException { if (resources == null || config.getExtractInlineImages() == false) { return;// ww w . j a va 2s.c o m } Map<String, PDXObject> xObjects = resources.getXObjects(); if (xObjects == null) { return; } for (Map.Entry<String, PDXObject> entry : xObjects.entrySet()) { PDXObject object = entry.getValue(); if (object == null) { continue; } COSBase cosObject = object.getCOSObject(); if (seenThisPage.contains(cosObject)) { //avoid infinite recursion TIKA-1742 continue; } seenThisPage.add(cosObject); if (object instanceof PDXObjectForm) { extractImages(((PDXObjectForm) object).getResources(), seenThisPage); } else if (object instanceof PDXObjectImage) { PDXObjectImage image = (PDXObjectImage) object; Metadata metadata = new Metadata(); String extension = ""; if (image instanceof PDJpeg) { metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); extension = ".jpg"; } else if (image instanceof PDCcitt) { metadata.set(Metadata.CONTENT_TYPE, "image/tiff"); extension = ".tif"; } else if (image instanceof PDPixelMap) { metadata.set(Metadata.CONTENT_TYPE, "image/png"); extension = ".png"; } Integer imageNumber = processedInlineImages.get(entry.getKey()); if (imageNumber == null) { imageNumber = inlineImageCounter++; } String fileName = "image" + imageNumber + extension; metadata.set(Metadata.RESOURCE_NAME_KEY, fileName); // Output the img tag AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "src", "src", "CDATA", "embedded:" + fileName); attr.addAttribute("", "alt", "alt", "CDATA", fileName); handler.startElement("img", attr); handler.endElement("img"); //Do we only want to process unique COSObject ids? //If so, have we already processed this one? if (config.getExtractUniqueInlineImagesOnly() == true) { String cosObjectId = entry.getKey(); if (processedInlineImages.containsKey(cosObjectId)) { continue; } processedInlineImages.put(cosObjectId, imageNumber); } metadata.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE, TikaCoreProperties.EmbeddedResourceType.INLINE.toString()); EmbeddedDocumentExtractor extractor = getEmbeddedDocumentExtractor(); if (extractor.shouldParseEmbedded(metadata)) { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); try { image.write2OutputStream(buffer); image.clear(); extractor.parseEmbedded(new ByteArrayInputStream(buffer.toByteArray()), new EmbeddedContentHandler(handler), metadata, false); } catch (IOException e) { // could not extract this image, so just skip it... } } } } resources.clear(); }
From source file:org.apache.tika.parser.pdf18.PDF2XHTML.java
private void addFieldString(PDField field, XHTMLContentHandler handler) throws SAXException { //Pick partial name to present in content and altName for attribute //Ignoring FullyQualifiedName for now String partName = field.getPartialName(); String altName = field.getAlternateFieldName(); StringBuilder sb = new StringBuilder(); AttributesImpl attrs = new AttributesImpl(); if (partName != null) { sb.append(partName).append(": "); }//from w ww .j a va 2 s .c o m if (altName != null) { attrs.addAttribute("", "altName", "altName", "CDATA", altName); } //return early if PDSignature field if (field instanceof PDSignatureField) { handleSignature(attrs, (PDSignatureField) field, handler); return; } try { //getValue can throw an IOException if there is no value String value = field.getValue(); if (value != null && !value.equals("null")) { sb.append(value); } } catch (IOException e) { //swallow } if (attrs.getLength() > 0 || sb.length() > 0) { handler.startElement("li", attrs); handler.characters(sb.toString()); handler.endElement("li"); } }
From source file:org.apache.tika.parser.pdf18.PDF2XHTML.java
private void handleSignature(AttributesImpl parentAttributes, PDSignatureField sigField, XHTMLContentHandler handler) throws SAXException { PDSignature sig = sigField.getSignature(); if (sig == null) { return;// w w w. j a v a 2s .c o m } Map<String, String> vals = new TreeMap<String, String>(); vals.put("name", sig.getName()); vals.put("contactInfo", sig.getContactInfo()); vals.put("location", sig.getLocation()); vals.put("reason", sig.getReason()); Calendar cal = sig.getSignDate(); if (cal != null) { dateFormat.setTimeZone(cal.getTimeZone()); vals.put("date", dateFormat.format(cal.getTime())); } //see if there is any data int nonNull = 0; for (String val : vals.keySet()) { if (val != null && !val.equals("")) { nonNull++; } } //if there is, process it if (nonNull > 0) { handler.startElement("li", parentAttributes); AttributesImpl attrs = new AttributesImpl(); attrs.addAttribute("", "type", "type", "CDATA", "signaturedata"); handler.startElement("ol", attrs); for (Map.Entry<String, String> e : vals.entrySet()) { if (e.getValue() == null || e.getValue().equals("")) { continue; } attrs = new AttributesImpl(); attrs.addAttribute("", "signdata", "signdata", "CDATA", e.getKey()); handler.startElement("li", attrs); handler.characters(e.getValue()); handler.endElement("li"); } handler.endElement("ol"); handler.endElement("li"); } }
From source file:org.apache.tika.parser.pkg.PackageParser.java
protected static Metadata handleEntryMetadata(String name, Date createAt, Date modifiedAt, Long size, XHTMLContentHandler xhtml) throws SAXException, IOException, TikaException { Metadata entrydata = new Metadata(); if (createAt != null) { entrydata.set(TikaCoreProperties.CREATED, createAt); }/*from ww w .j a v a2 s . co m*/ if (modifiedAt != null) { entrydata.set(TikaCoreProperties.MODIFIED, modifiedAt); } if (size != null) { entrydata.set(Metadata.CONTENT_LENGTH, Long.toString(size)); } if (name != null && name.length() > 0) { name = name.replace("\\", "/"); entrydata.set(Metadata.RESOURCE_NAME_KEY, name); AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "class", "class", "CDATA", "embedded"); attributes.addAttribute("", "id", "id", "CDATA", name); xhtml.startElement("div", attributes); xhtml.endElement("div"); entrydata.set(Metadata.EMBEDDED_RELATIONSHIP_ID, name); } return entrydata; }
From source file:org.apache.tika.parser.pot.PooledTimeSeriesParser.java
/** * Reads the contents of the given stream and write it to the given XHTML * content handler. The stream is closed once fully processed. * * @param stream Stream where is the result of ocr * @param xhtml XHTML content handler * @param tableTitle The name of the matrix/table to display. * @param frames Number of frames read from the video. * @param vecSize Size of the OF or HOG vector. * @throws SAXException if the XHTML SAX events could not be handled * @throws IOException if an input error occurred *//*from w w w. j a v a 2s .c om*/ private void doExtract(InputStream stream, XHTMLContentHandler xhtml, String tableTitle, String frames, String vecSize) throws SAXException, IOException { try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, UTF_8))) { String line = null; AttributesImpl attributes = new AttributesImpl(); attributes.addAttribute("", "", "rows", "CDATA", frames); attributes.addAttribute("", "", "cols", "CDATA", vecSize); xhtml.startElement("h3"); xhtml.characters(tableTitle); xhtml.endElement("h3"); xhtml.startElement("table", attributes); while ((line = reader.readLine()) != null) { xhtml.startElement("tr"); for (String val : line.split(" ")) { xhtml.startElement("td"); xhtml.characters(val); xhtml.endElement("td"); } xhtml.endElement("tr"); } xhtml.endElement("table"); } }
From source file:org.cloudata.core.rest.CloudataRestService.java
private static void makeRowXml(Row row, XmlWriter resultDoc) throws SAXException { AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "key", "", "", row.getKey().toString()); resultDoc.startElement("", "row", "", attr); attr.removeAttribute(0);// ww w. j a v a 2s . c o m String[] columns = row.getColumnNames(); for (int i = 0; i < columns.length; i++) { makeColumnXml(row, columns[i], resultDoc); } resultDoc.endElement("row"); }
From source file:org.cloudata.core.rest.CloudataRestService.java
private static void makeColumnXml(Row row, String columnName, XmlWriter resultDoc) throws SAXException { List<Cell> columnCells = row.getCellList(columnName); AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "name", "", "", columnName); resultDoc.startElement("", "column", "", attr); attr.removeAttribute(0);// ww w .j a v a2 s . c o m for (Cell cell : columnCells) { attr.addAttribute("", "key", "", "", cell.getKey().toString()); resultDoc.startElement("", "cell", "", attr); attr.removeAttribute(0); attr.addAttribute("", "timestamp", "", "", Long.toString(cell.getValue().getTimestamp())); resultDoc.dataElement("", "value", "", attr, cell.getValue().getValueAsString()); attr.removeAttribute(0); /* * without timeStamp resultDoc.dataElement("value", * cell.getValue().getValueAsString()); resultDoc.endElement("cell"); */ resultDoc.endElement("cell"); } resultDoc.endElement("column"); }
From source file:org.cloudata.core.rest.CloudataRestService.java
public String getResponseMessage(String message, boolean error) { try {//from w ww. j ava2s . com StringWriter buffer = new StringWriter(); XmlWriter resultDoc = getXmlWriter(buffer); AttributesImpl attr = new AttributesImpl(); resultDoc.startDocument(); resultDoc.startElement("cloudata"); if (error) { attr.addAttribute("", "error", "", "", "yes"); resultDoc.startElement("", "message", "", attr); } else { resultDoc.startElement("message"); } resultDoc.characters(message); resultDoc.endElement("message"); resultDoc.endElement("cloudata"); resultDoc.endDocument(); resultDoc.flush(); return buffer.toString(); } catch (Exception e) { LOG.error(e.getMessage(), e); return e.getMessage(); } }
From source file:org.cloudata.core.rest.CloudataRestService.java
private static void makeTableSchemaXml(TableSchema tableSchema, XmlWriter resultDoc) throws IOException { try {//from w w w. j a v a 2 s.c o m AttributesImpl attr = new AttributesImpl(); attr.addAttribute("", "name", "", "", tableSchema.getTableName()); resultDoc.startElement("", "table", "", attr); resultDoc.startElement("description"); resultDoc.characters(tableSchema.getDescription()); resultDoc.endElement("description"); for (ColumnInfo column : tableSchema.getColumnInfos()) { attr = new AttributesImpl(); attr.addAttribute("", "name", "", "", column.getColumnName()); attr.addAttribute("", "type", "", "", column.getColumnType() == TableSchema.BLOB_TYPE ? "blob" : "normal"); resultDoc.startElement("", "column", "", attr); resultDoc.endElement("column"); } resultDoc.endElement("table"); } catch (Exception e) { throw new IOException(e.getMessage(), e); } }