Java tutorial
/* * @author Jon Deering Copyright 2011 Saint Louis University. Licensed under the Educational Community License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.osedu.org/licenses/ECL-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ package textdisplay; import com.lowagie.text.Chunk; import com.lowagie.text.Document; import com.lowagie.text.DocumentException; import com.lowagie.text.Element; import com.lowagie.text.Font; import com.lowagie.text.Paragraph; import com.lowagie.text.pdf.BaseFont; import com.lowagie.text.pdf.PdfWriter; import com.tutego.jrtf.Rtf; import com.tutego.jrtf.RtfPara; import com.tutego.jrtf.RtfText; import static com.tutego.jrtf.Rtf.rtf; import static com.tutego.jrtf.RtfDocfmt.*; import static com.tutego.jrtf.RtfHeader.*; import static com.tutego.jrtf.RtfInfo.*; import static com.tutego.jrtf.RtfFields.*; import static com.tutego.jrtf.RtfPara.*; import static com.tutego.jrtf.RtfSectionFormatAndHeaderFooter.*; import static com.tutego.jrtf.RtfText.*; import static com.tutego.jrtf.RtfUnit.*; import java.io.BufferedReader; import java.io.File; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.FileReader; import java.io.FileWriter; import java.io.IOException; import java.io.OutputStream; import java.io.Writer; import java.util.Enumeration; import java.util.Hashtable; import java.util.Stack; import java.util.logging.Level; import java.util.logging.Logger; /** *A class to provide output formatting based on xml tags in a document. */ public class TagFilter { private String text; public TagFilter(String text) { this.text = text; } /** * Build an array of tag names that are present in the document. * @return an array of tags with no brackets or parameters */ public String[] getTags() { //this method does not use existing xml libraries because there is no presumption that the document is a well formed xml document //it can be a single page from a document, or just have some tags wrapping certain items that the user intends to style. String[] parts = text.split("<"); Hashtable h = new Hashtable(); Stack<String> inOrder = new Stack(); for (int i = 0; i < parts.length; i++) { String[] tmp = parts[i].split(">"); //if there was a > tag, then this is an actual tag, not a random angle bracket, so add it to the list if (tmp.length > 1 || parts[i].endsWith(">")) { String thisTag = tmp[0]; if (thisTag.endsWith("/")) { thisTag = thisTag.split(" ")[0]; } thisTag = thisTag.split(" ")[0]; if (h.contains(thisTag)) { } else { //find the matching end tag before adding this Boolean b = false; inOrder.add(thisTag); h.put(thisTag, thisTag); } } } String[] toret = new String[0]; while (!inOrder.isEmpty()) { //only add the tag to the tag list if it is self closing or has a closing tag Boolean addThis = false; String theTag = inOrder.pop(); if (theTag.endsWith("/")) addThis = true; if (h.contains("/" + theTag)) { addThis = true; } if (addThis) { String[] tmp = new String[toret.length + 1]; for (int i = 0; i < toret.length; i++) tmp[i] = toret[i]; tmp[tmp.length - 1] = theTag; toret = tmp; } } return toret; } /**Remove tags in the along with any text or other tags inside these tags*/ public String removeTagsAndContents(String[] tagsToExclude) { //this method does not use existing xml libraries because there is no presumption that the document is a well formed xml document //it can be a single page from a document, or just have some tags wrapping certain items that the user intends to style. if (tagsToExclude.length == 0) return text; String content = text; for (int i = 0; i < tagsToExclude.length; i++) { if (tagsToExclude[i] != null && tagsToExclude[i].compareTo("") != 0) { String[] parts = text.split("<" + tagsToExclude[i] + " .*?>"); content = ""; content += parts[0]; for (int j = 1; j < parts.length; j++) { String[] tmp = parts[j].split("</" + tagsToExclude[i] + ">"); if (tmp.length == 2) { content += tmp[1]; } } } } return content; } /**Remove these tags along with any text or other tags inside these tags*/ public String stripTags(String[] tagsToExclude) { //this method does not use existing xml libraries because there is no presumption that the document is a well formed xml document //it can be a single page from a document, or just have some tags wrapping certain items that the user intends to style. if (tagsToExclude.length == 0) return text; String content = text; for (int i = 0; i < tagsToExclude.length; i++) { if (tagsToExclude[i] != null && tagsToExclude[i].compareTo("") != 0) { String[] parts = content.split("<" + tagsToExclude[i] + ">|<" + tagsToExclude[i] + " +.*?>"); //System.out.print("tag is "+ tagsToExclude[i]+"\n"); //for(int j=0;j<parts.length;j++) // System.out.print("part "+j+" is: "+parts[j]+"\n"); content = ""; content += parts[0]; for (int j = 1; j < parts.length; j++) { String[] tmp = parts[j].split("</" + tagsToExclude[i] + ">"); if (tmp.length == 2) { content += tmp[0] + tmp[1]; } else { if (tmp.length < 2) { System.out.print("Missed closing for " + tagsToExclude[i] + "\n" + tmp[0] + "\n"); content += tmp[0]; } } } } } return content; } public enum styles { italic, bold, underlined, superscript, none, remove, paragraph }; public enum noteStyles { footnote, endnote, sidebyside, inline, remove }; public void replaceTagsWithPDFEncoding(String[] tags, styles[] tagStyles, OutputStream os) throws DocumentException { // FileWriter w = null; try { BaseFont bf = BaseFont.createFont("/usr/Junicode.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); Document doc = new Document(); PdfWriter p = PdfWriter.getInstance(doc, os); doc.open(); Paragraph para = new Paragraph(); para.setFont(new Font(bf, 12, Font.NORMAL)); //doc.add(para); Font italic = new Font(bf, 12, Font.ITALIC); Font bold = new Font(bf, 12, Font.BOLD); Font underlined = new Font(bf, 12, Font.UNDERLINE); StringBuilder chunkBuffer = new StringBuilder(""); //holds the next bit of content that will be added to the pdf as a chunk styles chunkStyle = null; //the style to be applied to chunkBuffer when it gets added to the document String chunkTag = ""; Stack<String> wrappingTags = new Stack(); Stack<styles> wrappingStyles = new Stack(); String content = text; Boolean inTag = false; //is this inside a tag, meaning between the < and > String tagTextBuffer = ""; //the text of the tag, including name and any parameters Boolean beingTagged = false; //Is the parser currently reading character data that is surrounded by a tag that demands styling for (int charCounter = 0; charCounter < this.text.length(); charCounter++) { if (text.charAt(charCounter) == '>') { inTag = false; //if this was a self closing tag, dont do anything if (tagTextBuffer.contains("/>")) { tagTextBuffer = ""; } else { //this is a closing tag, save the chunk and pop the tag and style off of the stack if (tagTextBuffer.startsWith("/")) { if (chunkStyle != null) System.out.print(" closing tag " + tagTextBuffer + " with style " + chunkStyle.name() + "\n"); else System.out.print(" closing tag " + tagTextBuffer + " with style null" + "\n"); if (chunkStyle == styles.paragraph) chunkBuffer = new StringBuilder("\n" + chunkBuffer); Chunk c = new Chunk(chunkBuffer.toString()); styleChunk(c, chunkStyle); if (chunkStyle != styles.remove) para.add(c); chunkBuffer = new StringBuilder(""); chunkStyle = null; chunkTag = ""; if (!wrappingStyles.empty()) { chunkStyle = wrappingStyles.pop(); chunkTag = wrappingTags.pop(); } tagTextBuffer = ""; } else { //this is the closing bracket of an opening tag String tagName = tagTextBuffer.split(" ")[0]; System.out.print("tag is " + tagName + "\n"); for (int i = 0; i < tags.length; i++) { if (tags[i].compareTo(tagName) == 0) { // this is a tag that is suposed to be styled in the pdf if (chunkStyle != null) { //this tag is nested in a tag that was already applying styling. Add this chunk to the pdf and put the tag/style //for the previous tag on the stack, so when this new tag ends, the previous styling will resume. if (chunkStyle == styles.paragraph) chunkBuffer = new StringBuilder("\n" + chunkBuffer); Chunk c = new Chunk(chunkBuffer.toString()); styleChunk(c, chunkStyle); if (chunkStyle != styles.remove) para.add(c); wrappingStyles.add(chunkStyle); wrappingTags.add(chunkTag); chunkTag = tagName; chunkStyle = tagStyles[i]; chunkBuffer = new StringBuilder(""); } else { Chunk c = new Chunk(chunkBuffer.toString()); para.add(c); chunkTag = tagName; chunkStyle = tagStyles[i]; chunkBuffer = new StringBuilder(""); } } } tagTextBuffer = ""; } } } if (inTag) { tagTextBuffer += text.charAt(charCounter); } if (text.charAt(charCounter) == '<') { if (inTag) { //if we hit another < before hitting a > this was not a tag, so add the tagTextBuffer to the chunk. It was simply conent. chunkBuffer.append(tagTextBuffer); tagTextBuffer = ""; } inTag = true; } if (!inTag && text.charAt(charCounter) != '>') { chunkBuffer.append(text.charAt(charCounter)); } } Chunk c = new Chunk(chunkBuffer.toString()); para.add(c); doc.newPage(); doc.add(para); doc.newPage(); doc.close(); } catch (IOException ex) { Logger.getLogger(TagFilter.class.getName()).log(Level.SEVERE, null, ex); } finally { } } /**Apply a style (font) to a chunk of pdf text*/ private void styleChunk(Chunk c, styles s) { try { BaseFont bf = BaseFont.createFont("/usr/Junicode.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); BaseFont ita = BaseFont.createFont("/usr/Junicode-Italic.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); BaseFont bol = BaseFont.createFont("/usr/Junicode-Italic.ttf", BaseFont.IDENTITY_H, BaseFont.EMBEDDED); if (bf.charExists(540)) { System.out.print("font cant do 540\n"); } Font italic = new Font(ita, 12, Font.ITALIC); Font bold = new Font(bol, 12, Font.BOLD); Font underlined = new Font(bf, 12, Font.UNDERLINE); Font superscript = new Font(bf, 9, Font.NORMAL); if (s == styles.bold) { c.setFont(bold); } if (s == styles.italic) { c.setFont(italic); } if (s == styles.underlined) { c.setFont(underlined); } if (s == styles.superscript) { c.setTextRise(7.0f); c.setFont(superscript); } //wipe out that content } catch (DocumentException ex) { Logger.getLogger(TagFilter.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(TagFilter.class.getName()).log(Level.SEVERE, null, ex); } } /**Style the tags that have been passed in as requested and write the rtf document to the writer w*/ public void replaceTagsWithRTFEncoding(String[] tags, styles[] tagStyles, Writer w) { for (int i = 0; i < tagStyles.length; i++) { if (tagStyles[i] == null) tagStyles[i] = styles.none; } Stack<RtfText> paragraphs = new Stack(); String content = text; StringBuilder chunkBuffer = new StringBuilder(""); //holds the next bit of content that will be added to the pdf as a chunk styles chunkStyle = null; //the style to be applied to chunkBuffer when it gets added to the document String chunkTag = ""; Stack<String> wrappingTags = new Stack(); Stack<styles> wrappingStyles = new Stack(); Boolean inTag = false; //is this inside a tag, meaning between the < and > String tagTextBuffer = ""; //the text of the tag, including name and any parameters Boolean beingTagged = false; //Is the parser currently reading character data that is surrounded by a tag that demands styling for (int charCounter = 0; charCounter < this.text.length(); charCounter++) { if (text.charAt(charCounter) == '>') { inTag = false; //if this was a self closing tag, dont do anything if (tagTextBuffer.contains("/>") || tagTextBuffer.contains("-->")) { System.out.print("Skipping auto closing or comment tag " + tagTextBuffer + "\n"); tagTextBuffer = ""; } else { //this is a closing tag, save the chunk and pop the tag and style off of the stack if (tagTextBuffer.startsWith("/")) { if (chunkStyle != null) System.out.print(" closing tag " + tagTextBuffer.replace("/", "") + " with style " + chunkStyle.name() + "\n"); else System.out.print(" closing tag " + tagTextBuffer + " with style null and content " + chunkBuffer + "\n"); if (chunkStyle != styles.remove) { paragraphs.add(applyRTFStyle(chunkStyle, chunkBuffer.toString())); } chunkBuffer = new StringBuilder(""); chunkTag = ""; if (!wrappingStyles.empty()) { chunkStyle = wrappingStyles.pop(); chunkTag = wrappingTags.pop(); } else { System.out.print("Forcing style italic because style is unknown\n"); chunkStyle = styles.none; } tagTextBuffer = ""; } else { //this is the closing bracket of an opening tag String tagName = tagTextBuffer.split(" ")[0]; System.out.print("tag is " + tagName + "\n"); for (int i = 0; i < tags.length; i++) { if (tags[i].compareTo(tagName) == 0) { // this is a tag that is suposed to be styled in the pdf if (chunkStyle != null) { //this tag is nested in a tag that was already applying styling. Add this chunk to the pdf and put the tag/style //for the previous tag on the stack, so when this new tag ends, the previous styling will resume. //if(chunkStyle != styles.remove) paragraphs.add(applyRTFStyle(chunkStyle, chunkBuffer.toString())); wrappingStyles.add(chunkStyle); wrappingTags.add(chunkTag); System.out.print("Stack add " + chunkTag + " with style " + chunkStyle + "\n"); chunkTag = tagName; chunkStyle = tagStyles[i]; chunkBuffer = new StringBuilder(""); } else { paragraphs.add(text(chunkBuffer)); chunkTag = tagName; chunkStyle = tagStyles[i]; chunkBuffer = new StringBuilder(""); } } } tagTextBuffer = ""; } } } if (inTag) { tagTextBuffer += text.charAt(charCounter); } if (text.charAt(charCounter) == '<') { if (inTag) { //if we hit another < before hitting a > this was not a tag, so add the tagTextBuffer to the chunk. It was simply conent. chunkBuffer.append(tagTextBuffer); tagTextBuffer = ""; } inTag = true; } if (!inTag && text.charAt(charCounter) != '>') { chunkBuffer.append(text.charAt(charCounter)); } } if (chunkBuffer.length() > 0) { paragraphs.add(applyRTFStyle(styles.none, chunkBuffer.toString())); } Stack<RtfPara> textParas = new Stack(); RtfText[] textarray = new RtfText[paragraphs.size()]; for (int i = 0; i < paragraphs.size(); i++) { textarray[i] = paragraphs.get(i); } RtfPara p = RtfPara.p(textarray, true); textParas.add(p); Rtf.rtf().section(textParas).out(w); return; } public RtfText applyRTFStyle(styles tagStyle, String text) { RtfText styledPortion = RtfText.text(); Boolean styled = false; if (tagStyle == null) return styledPortion; if (tagStyle == styles.italic) { styledPortion = RtfText.italic(text); styled = true; } if (tagStyle == styles.bold) { styledPortion = RtfText.bold(text); styled = true; } if (tagStyle == styles.underlined) { styledPortion = RtfText.underline(text); styled = true; } if (tagStyle == styles.remove) { styledPortion = RtfText.text(""); styled = true; } if (tagStyle == styles.none) { styledPortion = RtfText.text(text); styled = true; } if (tagStyle == styles.paragraph) { styledPortion = RtfText.text("\n" + text); styled = true; } if (tagStyle == styles.superscript) { styledPortion = RtfText.superscript(text); styled = true; } if (!styled) { System.out.print("Unknown style, using default non styled text!\n"); styledPortion = RtfText.text(text); } return styledPortion; } public static void main(String[] args) throws FileNotFoundException, IOException, DocumentException { BufferedReader f = new BufferedReader(new FileReader(new File("/usr/web/test.xml"))); FileWriter w = new FileWriter(new File("/usr/web/filtered2.txt")); String txt = ""; while (f.ready()) { txt += f.readLine(); } txt = txt.replaceAll(" +", " "); // System.out.print(txt); TagFilter filter = new TagFilter(txt); String[] tags = filter.getTags(); for (int i = 0; i < tags.length; i++) { System.out.print(tags[i] + "\n"); if (tags[i].compareTo("p") == 0) tags[i] = ""; } //String tmp=filter.stripTags(tags); //w.append(tmp); //filter=new TagFilter(tmp); String[] o = new String[] { "p" }; filter.replaceTagsWithPDFEncoding(new String[] { "p", "note" }, new styles[] { styles.italic, styles.bold }, new FileOutputStream(new File("/usr/test.pdf"))); } }