Example usage for org.xml.sax Attributes getValue

List of usage examples for org.xml.sax Attributes getValue

Introduction

In this page you can find the example usage for org.xml.sax Attributes getValue.

Prototype

public abstract String getValue(String qName);

Source Link

Document

Look up an attribute's value by XML qualified (prefixed) name.

Usage

From source file:Main.java

public static void main(String[] args) throws Exception {
    SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
    parser.parse(new File("test.xml"), new DefaultHandler() {

        @Override// w ww.  ja  v  a 2  s  . com
        public void startElement(String uri, String localName, String qName, Attributes atts)
                throws SAXException {
            if (qName.equals("passenger")) {
                System.out.println("id = " + atts.getValue(0));
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            String text = new String(ch, start, length);
            if (!text.trim().isEmpty()) {
                System.out.println("name " + text);
            }
        }
    });
}

From source file:SAXTest.java

public static void main(String[] args) throws Exception {
    String url;//www .j a va 2 s  . c  o m
    if (args.length == 0) {
        url = "http://www.w3c.org";
        System.out.println("Using " + url);
    } else
        url = args[0];

    DefaultHandler handler = new DefaultHandler() {
        public void startElement(String namespaceURI, String lname, String qname, Attributes attrs) {
            if (lname.equals("a") && attrs != null) {
                for (int i = 0; i < attrs.getLength(); i++) {
                    String aname = attrs.getLocalName(i);
                    if (aname.equals("href"))
                        System.out.println(attrs.getValue(i));
                }
            }
        }
    };

    SAXParserFactory factory = SAXParserFactory.newInstance();
    factory.setNamespaceAware(true);
    SAXParser saxParser = factory.newSAXParser();
    InputStream in = new URL(url).openStream();
    saxParser.parse(in, handler);
}

From source file:efen.parsewiki.WikipediaDocumentSequence.java

public static void main(final String arg[])
        throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException {
    SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(),
            "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.",
            new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"),
                    new Switch("iso", 'i', "iso",
                            "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."),
                    new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE),
                            JSAP.NOT_REQUIRED, 'w', "width",
                            "The width, in bits, of the signatures used to sign the function from URIs to their rank."),
                    new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The file containing the Wikipedia dump."),
                    new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."),
                    new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The URIs of the documents in the collection (generated by ScanMetadata)."),
                    new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of a precomputed virtual document resolver for the collection."),
                    new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of the resulting virtual document resolver.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        return;//from w w  w .java2  s. c  om

    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>();
    final String baseURL = jsapResult.getString("baseURL");
    final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
    progressLogger.itemsName = "redirects";
    progressLogger.start("Extracting redirects...");

    final SAXParser parser = saxParserFactory.newSAXParser();
    final DefaultHandler handler = new DefaultHandler() {
        private boolean inTitle;
        private MutableString title = new MutableString();

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                inTitle = false;
                title.length(0);
            } else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first title element.
            else if ("redirect".equals(localName) && attributes.getValue("title") != null) {
                progressLogger.update();
                redirects.put(title.copy(), attributes.getValue("title"));
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("title".equals(localName))
                inTitle = false;
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    InputStream in = new FileInputStream(jsapResult.getString("file"));
    if (jsapResult.userSpecified("bzip2"))
        in = new BZip2CompressorInputStream(in);
    parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
            handler);
    progressLogger.done();

    final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>();
    final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr"));

    progressLogger.expectedUpdates = redirects.size();
    progressLogger.start("Examining redirects...");

    for (Map.Entry<MutableString, String> e : redirects.entrySet()) {
        final MutableString start = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true));
        final MutableString end = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getValue(), true));
        final long s = vdr.resolve(start);
        if (s == -1) {
            final long t = vdr.resolve(end);
            if (t != -1)
                resolved.put(start.copy(), t);
            else
                LOGGER.warn("Failed redirect: " + start + " -> " + end);
        } else
            LOGGER.warn("URL " + start + " is already known to the virtual document resolver");

        progressLogger.lightUpdate();
    }

    progressLogger.done();

    //System.err.println(resolved);

    final Iterable<MutableString> allURIs = Iterables
            .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet());
    final long numberOfDocuments = vdr.numberOfDocuments();

    final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso")
            ? TransformationStrategies.iso()
            : TransformationStrategies.utf16();

    BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments,
            new ShiftAddXorSignedStringMap(allURIs.iterator(),
                    new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy)
                            .build(),
                    jsapResult.getInt("width")),
            resolved.values().toLongArray())), jsapResult.getString("redvdr"));
}

From source file:it.unimi.di.wikipedia.parsing.NamespacedWikipediaDocumentSequence.java

public static void main(final String arg[])
        throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException {
    SimpleJSAP jsap = new SimpleJSAP(NamespacedWikipediaDocumentSequence.class.getName(),
            "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.",
            new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"),
                    new Switch("iso", 'i', "iso",
                            "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."),
                    new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE),
                            JSAP.NOT_REQUIRED, 'w', "width",
                            "The width, in bits, of the signatures used to sign the function from URIs to their rank."),
                    new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The file containing the Wikipedia dump."),
                    new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."),
                    new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The URIs of the documents in the collection (generated by ScanMetadata)."),
                    new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of a precomputed virtual document resolver for the collection."),
                    new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of the resulting virtual document resolver.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        return;//  w  ww . ja va 2  s.co  m

    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>();
    final String baseURL = jsapResult.getString("baseURL");
    final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
    progressLogger.itemsName = "redirects";
    progressLogger.start("Extracting redirects...");

    final SAXParser parser = saxParserFactory.newSAXParser();
    final DefaultHandler handler = new DefaultHandler() {
        private boolean inTitle;
        private MutableString title = new MutableString();

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                inTitle = false;
                title.length(0);
            } else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first title element.
            else if ("redirect".equals(localName) && attributes.getValue("title") != null) {
                progressLogger.update();
                redirects.put(title.copy(), attributes.getValue("title"));
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("title".equals(localName))
                inTitle = false;
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    InputStream in = new FileInputStream(jsapResult.getString("file"));
    if (jsapResult.userSpecified("bzip2"))
        in = new BZip2CompressorInputStream(in);
    parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
            handler);
    progressLogger.done();

    final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>();
    final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr"));

    progressLogger.expectedUpdates = redirects.size();
    progressLogger.start("Examining redirects...");

    for (Map.Entry<MutableString, String> e : redirects.entrySet()) {
        final MutableString start = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true));
        final MutableString end = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getValue(), true));
        final long s = vdr.resolve(start);
        if (s == -1) {
            final long t = vdr.resolve(end);
            if (t != -1)
                resolved.put(start.copy(), t);
            else
                LOGGER.warn("Failed redirect: " + start + " -> " + end);
        } else
            LOGGER.warn("URL " + start + " is already known to the virtual document resolver");

        progressLogger.lightUpdate();
    }

    progressLogger.done();

    //System.err.println(resolved);

    final Iterable<MutableString> allURIs = Iterables
            .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet());
    final long numberOfDocuments = vdr.numberOfDocuments();

    final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso")
            ? TransformationStrategies.iso()
            : TransformationStrategies.utf16();

    BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments,
            new ShiftAddXorSignedStringMap(allURIs.iterator(),
                    new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy)
                            .build(),
                    jsapResult.getInt("width")),
            resolved.values().toLongArray())), jsapResult.getString("redvdr"));
}

From source file:Main.java

public static int getIntAttribute(Attributes attr, String name, int defaultVal) {
    String value = attr.getValue(name);
    if (value == null)
        return defaultVal;

    try {//  w w  w. j  a  v a2 s.c  o m
        return Integer.parseInt(value);
    } catch (Exception ex) {
        return defaultVal;
    }
}

From source file:Main.java

/**
 * Returns an attribute value as a Integer value.
 * @param attributes the Attributes object
 * @param name the name of the attribute
 * @return the attribute value as an Integer or null if the attribute is missing
 *//*  w w  w  .  j  a  va 2 s.  com*/
public static Integer getAttributeAsInteger(Attributes attributes, String name) {
    String s = attributes.getValue(name);
    if (s == null) {
        return null;
    } else {
        return new Integer(s);
    }
}

From source file:Main.java

/**
 * Returns an attribute value as a boolean value.
 * @param attributes the Attributes object
 * @param name the name of the attribute
 * @param defaultValue the default value if the attribute is not specified
 * @return the attribute value as a boolean
 *///from w  ww .j  ava  2s . c  o m
public static boolean getAttributeAsBoolean(Attributes attributes, String name, boolean defaultValue) {
    String s = attributes.getValue(name);
    if (s == null) {
        return defaultValue;
    } else {
        return Boolean.valueOf(s).booleanValue();
    }
}

From source file:Main.java

/**
 * Returns an attribute value as a glyph position adjustments array. The string value
 * is expected to be a non-empty sequence of either Z<repeat> or <number>, where the
 * former encodes a repeat count (of zeroes) and the latter encodes a integer number,
 * and where each item is separated by whitespace.
 * @param attributes the Attributes object
 * @param name the name of the attribute
 * @return the position adjustments array
 */// w  ww. j  ava2 s.com
public static int[][] getAttributeAsPositionAdjustments(Attributes attributes, String name) {
    String s = attributes.getValue(name);
    if (s == null) {
        return null;
    } else {
        return decodePositionAdjustments(s.trim());
    }
}

From source file:Main.java

/**
 * Returns an attribute value as a int value.
 * @param attributes the Attributes object
 * @param name the name of the attribute
 * @param defaultValue the default value if the attribute is not specified
 * @return the attribute value as an int
 *///www.  j a v a2 s .  c om
public static int getAttributeAsInt(Attributes attributes, String name, int defaultValue) {
    String s = attributes.getValue(name);
    if (s == null) {
        return defaultValue;
    } else {
        return Integer.parseInt(s);
    }
}

From source file:Main.java

/**
 * Returns an attribute value as a int value.
 * @param attributes the Attributes object
 * @param name the name of the attribute
 * @return the attribute value as an int
 * @throws SAXException if the attribute is missing
 *///from   w w w . ja v  a  2s  . c o  m
public static int getAttributeAsInt(Attributes attributes, String name) throws SAXException {
    String s = attributes.getValue(name);
    if (s == null) {
        throw new SAXException("Attribute '" + name + "' is missing");
    } else {
        return Integer.parseInt(s);
    }
}