/*
License $Id: Parser.java,v 1.9 2005/07/06 18:31:00 hendriks73 Exp $
Copyright (c) 2001-2005 tagtraum industries.
LGPL
====
jo! is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
jo! is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
For LGPL see <http://www.fsf.org/copyleft/lesser.txt>
Sun license
===========
This release contains software by Sun Microsystems. Therefore
the following conditions have to be met, too. They apply to the
files
- lib/mail.jar
- lib/activation.jar
- lib/jsse.jar
- lib/jcert.jar
- lib/jaxp.jar
- lib/crimson.jar
- lib/servlet.jar
- lib/jnet.jar
- lib/jaas.jar
- lib/jaasmod.jar
contained in this release.
a. Licensee may not modify the Java Platform
Interface (JPI, identified as classes contained within the javax
package or any subpackages of the javax package), by creating additional
classes within the JPI or otherwise causing the addition to or modification
of the classes in the JPI. In the event that Licensee creates any
Java-related API and distribute such API to others for applet or
application development, you must promptly publish broadly, an accurate
specification for such API for free use by all developers of Java-based
software.
b. Software is confidential copyrighted information of Sun and
title to all copies is retained by Sun and/or its licensors. Licensee
shall not modify, decompile, disassemble, decrypt, extract, or otherwise
reverse engineer Software. Software may not be leased, assigned, or
sublicensed, in whole or in part. Software is not designed or intended
for use in on-line control of aircraft, air traffic, aircraft navigation
or aircraft communications; or in the design, construction, operation or
maintenance of any nuclear facility. Licensee warrants that it will not
use or redistribute the Software for such purposes.
c. Software is provided "AS IS," without a warranty
of any kind. ALL EXPRESS OR IMPLIED REPRESENTATIONS AND WARRANTIES,
INCLUDING ANY IMPLIED WARRANTY OF MERCHANTABILITY, FITNESS FOR A
PARTICULAR PURPOSE OR NON-INFRINGEMENT, ARE HEREBY EXCLUDED.
d. This License is effective until terminated. Licensee may
terminate this License at any time by destroying all copies of Software.
This License will terminate immediately without notice from Sun if Licensee
fails to comply with any provision of this License. Upon such termination,
Licensee must destroy all copies of Software.
e. Software, including technical data, is subject to U.S.
export control laws, including the U.S. Export Administration Act and its
associated regulations, and may be subject to export or import regulations
in other countries. Licensee agrees to comply strictly with all such
regulations and acknowledges that it has the responsibility to obtain
licenses to export, re-export, or import Software. Software may not be
downloaded, or otherwise exported or re-exported (i) into, or to a national
or resident of, Cuba, Iraq, Iran, North Korea, Libya, Sudan, Syria or any
country to which the U.S. has embargoed goods; or (ii) to anyone on the
U.S. Treasury Department's list of Specially Designated Nations or the U.S.
Commerce Department's Table of Denial Orders.
Feedback
========
We encourage your feedback and suggestions and want to use your feedback to
improve the Software. Send all such feedback to:
<feedback@tagtraum.com>
For more information on tagtraum industries and jo!
please see <http://www.tagtraum.com/>.
*/
package com.tagtraum.framework.markup;
import com.tagtraum.framework.util.FileLocation;
import com.tagtraum.framework.util.StackedReader;
import java.io.*;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.net.URL;
import java.util.Arrays;
import java.util.HashMap;
/**
* This class is capable of reading textual data and recognizing certain
* parts of that text beginning with a 'magic' character. The default
* character used is '<'. All parts of the text are returned as
* {@link I_Element}s. The class allows to register certain element classes
* for tag starts.<p>
* Not recognized parts are returned as an instance of the default
* element class.
*
* @author <a href="mailto:hs@tagtraum.com">Hendrik Schreiber</a>
* @version 1.1beta1 $Id: Parser.java,v 1.9 2005/07/06 18:31:00 hendriks73 Exp $
*/
public class Parser {
/**
* Source-Version
*/
public static String vcid = "$Id: Parser.java,v 1.9 2005/07/06 18:31:00 hendriks73 Exp $";
/**
* Internal Reader.
*/
protected PushbackReader myReader;
protected Reader myRawReader;
// data organization. first shot. this should be a tree
protected HashMap myElementMap;
protected Constructor[] myElements;
protected String[] myTagStarts;
protected Constructor myDefaultElementConstructor;
protected int myLongestTagStartLength;
protected URL myURL;
protected int myMagicChar;
/**
* Create a new Parser.
*
* @param aReader a Reader that supports <code>mark</code>
*/
public Parser(Reader aReader) {
// make sure we are starting with line 1
if (aReader instanceof LineNumberReader) ((LineNumberReader)aReader).setLineNumber(1);
myReader = null;
myRawReader = aReader;
myElementMap = new HashMap();
try {
myDefaultElementConstructor = RawDataElement.class.getConstructor(new Class[]{
String.class
});
}
catch (NoSuchMethodException nsme) {
nsme.printStackTrace();
}
myMagicChar = (int)'<';
}
/**
* Creates a new Parser.
*
* @param aURL the URL is opened and the resulting InputStream is
* wrapped with an InputputStreamReader using the default encoding
*/
public Parser(URL aURL) throws IOException {
this(new LineNumberReader(new InputStreamReader(aURL.openStream())));
setURL(aURL);
}
/**
* Creates a new Parser.
*
* @param aURL the URL is opened and the resulting InputStream is
* wrapped with an InputputStreamReader using the specified encoding
* @param anEncoding the way data is encoded
*/
public Parser(URL aURL, String anEncoding) throws IOException {
this(new LineNumberReader(new InputStreamReader(aURL.openStream(), anEncoding)));
setURL(aURL);
}
/**
* Sets the default element Constructor. The default value for this is
* {@link RawDataElement}.
*
* @param aDefaultElementConstructor the default element Constructor
* @see #getDefaultElementConstructor()
*/
public void setDefaultElementClass(Constructor aDefaultElementConstructor) {
myDefaultElementConstructor = aDefaultElementConstructor;
}
/**
* Returns the default element Constructor. The default value for this is
* {@link RawDataElement}.
*
* @return the default element Constructor
* @see #setDefaultElementClass
*/
public Constructor getDefaultElementConstructor() {
return myDefaultElementConstructor;
}
/**
* Sets the 'magic' character.
*
* @param aMagicChar the stop character
*/
public void setMagicChar(char aMagicChar) {
myMagicChar = (int)aMagicChar;
}
/**
* Returns the 'magic' character.
*
* @return the 'magic' character
*/
public char getMagicChar() {
return (char)myMagicChar;
}
/**
* Associates this parser with an URL.
*
* @param aURL the URL this Parser is parsing.
*/
public void setURL(URL aURL) {
myURL = aURL;
}
/**
* Returns the URL this parser is parsing. Note that this must
* have been set explicitly using {@link #setURL} unless you
* used the appropriate constructor.
*
* @return the URL this Parser is parsing.
*/
public URL getURL() {
return myURL;
}
/**
* Returns the next {@link I_Element}.
*
* return the next element or <code>null</code> if EOF is reached
*/
public I_Element nextElement() throws IOException {
if (myReader == null) {
synchronized (this) {
if (myReader == null) {
myReader = new PushbackReader(myRawReader, 256);
}
}
}
int c = myReader.read();
I_Element theElement = null;
if (c != -1 && c != myMagicChar) {
myReader.unread(c);
theElement = getDefaultElement();
if (myRawReader instanceof StackedReader) {
final StackedReader stackedReader = (StackedReader)myRawReader;
int lineNumber = stackedReader.getLineNumber();
// fix linenumber
if (c == '\n') lineNumber--;
setFileLocation(lineNumber, theElement);
}
theElement.parse(myReader);
} else {
if (c == -1) {
return null; // EOF
}
try {
final int lineNumber = getLineNumber();
theElement = getElement();
if (theElement != null) {
setFileLocation(lineNumber, theElement);
theElement.parse(myReader);
} else {
myReader.unread(c);
theElement = getDefaultElement();
setFileLocation(theElement);
theElement.parse(myReader);
}
}
catch (EOFException oefe) {
}
}
return theElement;
}
private int getLineNumber() {
if (myRawReader instanceof StackedReader) {
final StackedReader stackedReader = (StackedReader)myRawReader;
return stackedReader.getLineNumber();
}
return -1;
}
private void setFileLocation(I_Element theElement) {
if (myRawReader instanceof StackedReader) {
final StackedReader stackedReader = (StackedReader)myRawReader;
final int lineNumber = stackedReader.getLineNumber();
setFileLocation(lineNumber, theElement);
}
}
private void setFileLocation(final int lineNumber, I_Element theElement) {
if (myRawReader instanceof StackedReader) {
final StackedReader stackedReader = (StackedReader)myRawReader;
theElement.setLine(lineNumber);
try {
FileLocation location = (FileLocation)stackedReader.getCurrentContextObject().clone();
location.setLineNumber(lineNumber);
theElement.setFileLocation(location);
} catch (CloneNotSupportedException e) {
e.printStackTrace(); //should never happen
}
}
}
/**
* Returns a new instance of the default element.
* It usually repersents raw data.
*
* @return an {@link I_Element}
* @see RawDataElement
*/
protected I_Element getDefaultElement() throws IOException {
try {
return (I_Element)myDefaultElementConstructor.newInstance(new Object[]{
null
});
}
catch (Exception e) {
throw new ParserException("Failed to instantiate default element of type " + myDefaultElementConstructor);
}
}
/**
* Read first characters and return the appropiate {@link I_Element}.
*
* @return an Element or null if EOF
*/
protected I_Element getElement() throws IOException {
char[] buf = new char[myLongestTagStartLength];
int readChars = 0;
for (int justRead=0; justRead >= 0 && readChars < myLongestTagStartLength;) {
justRead = myReader.read(buf, readChars, myLongestTagStartLength - readChars);
if (justRead > 0) readChars+=justRead;
}
if (readChars == 0) {
// end of file
throw new EOFException();
}
String theTagStart = new String(buf, 0, readChars);
for (int i = myTagStarts.length - 1; i > -1; i--) {
// this is a linear search and could be optimized with a tree
if (theTagStart.startsWith(myTagStarts[i])) {
myReader.unread(buf, 0, readChars);
try {
return (I_Element)myElements[i].newInstance(new Object[]{(Object)myTagStarts[i]});
}
catch (Exception e) {
throw new ParserException("Failed to instantiate tag of type " + myElements[i]);
}
}
}
myReader.unread(buf);
return null;
}
/**
* Registers an Element class.
*
* @param anElementClass an Element class
* @throws ParserException if the class does not have a method called "getTagStart"
*/
public void addElementClass(Class anElementClass, String aTagStart) throws ParserException, NoSuchMethodException {
myElementMap.put(aTagStart, anElementClass.getConstructor(new Class[]{
String.class
}));
reorderTagStarts();
}
/**
* Un-Registers an Element class.
*
* @throws ParserException if the class does not have a method called "getTagStart"
*/
public void removeElementClass(String aTagStart) throws ParserException {
if (myElementMap.remove(aTagStart) != null) {
reorderTagStarts();
}
}
/**
* Returns an Element class.
*
* @param aTagStart String the class is registered under
* @see Tag#getTagStart()
*/
public Constructor getElementConstructor(String aTagStart) {
return (Constructor)myElementMap.get(aTagStart);
}
/**
* Closes the underlying reader.
*/
public void close() throws IOException {
if (myReader != null) {
myReader.close();
myReader = null;
}
}
/**
* Order TagStarts and ElementClasses to get the longest match first.
*/
protected void reorderTagStarts() {
myLongestTagStartLength = 0;
myTagStarts = new String[myElementMap.size()];
myTagStarts = (String[])myElementMap.keySet().toArray(myTagStarts);
Arrays.sort(myTagStarts); // myTagStarts must be ordered by length, longest first
myElements = new Constructor[myTagStarts.length];
for (int i = 0; i < myElements.length; i++) {
myElements[i] = (Constructor)myElementMap.get(myTagStarts[i]);
if (myTagStarts[i].length() > myLongestTagStartLength) {
myLongestTagStartLength = myTagStarts[i].length();
}
}
}
/**
* Returns the value of the method <code>getTagStart()</code> of an
* element class.
*/
protected String getTagStart(Class aClass) throws ParserException {
try {
Object o = aClass.newInstance();
Class[] paramType = new Class[0];
Object[] args = new Object[0];
Method theMethod = aClass.getMethod("getTagStart", paramType);
return (String)theMethod.invoke(o, args);
}
catch (Exception e) {
throw new IllegalArgumentException("Failed to invoke getTagStart() of " + aClass);
}
}
/**
* Tests.
*/
public static void main(String[] args) throws Exception {
Parser theParser = new Parser(new BufferedReader(new FileReader(args[0])));
theParser.addElementClass(com.tagtraum.framework.markup.Tag.class, "");
theParser.addElementClass(com.tagtraum.framework.markup.Comment.class, "!--");
theParser.addElementClass(com.tagtraum.framework.markup.SSITag.class, "!--#");
I_Element theElement = null;
int length = 0;
while ((theElement = theParser.nextElement()) != null) {
System.out.println(theElement);
length += theElement.getLength();
}
System.out.println();
System.out.println("Parsed Length: " + length);
File theFile = new File(args[0]);
System.out.println("Real Length : " + theFile.length());
}
}
|