List of usage examples for org.xml.sax Attributes getValue
public abstract String getValue(String qName);
From source file:Main.java
public static void main(String[] args) throws Exception { SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); parser.parse(new File("test.xml"), new DefaultHandler() { @Override// w ww. ja v a 2 s . com public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException { if (qName.equals("passenger")) { System.out.println("id = " + atts.getValue(0)); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { } @Override public void characters(char[] ch, int start, int length) throws SAXException { String text = new String(ch, start, length); if (!text.trim().isEmpty()) { System.out.println("name " + text); } } }); }
From source file:SAXTest.java
public static void main(String[] args) throws Exception { String url;//www .j a va 2 s . c o m if (args.length == 0) { url = "http://www.w3c.org"; System.out.println("Using " + url); } else url = args[0]; DefaultHandler handler = new DefaultHandler() { public void startElement(String namespaceURI, String lname, String qname, Attributes attrs) { if (lname.equals("a") && attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { String aname = attrs.getLocalName(i); if (aname.equals("href")) System.out.println(attrs.getValue(i)); } } } }; SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); SAXParser saxParser = factory.newSAXParser(); InputStream in = new URL(url).openStream(); saxParser.parse(in, handler); }
From source file:efen.parsewiki.WikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;//from w w w .java2 s. c om final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }
From source file:it.unimi.di.wikipedia.parsing.NamespacedWikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(NamespacedWikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;// w ww . ja va 2 s.co m final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }
From source file:Main.java
public static int getIntAttribute(Attributes attr, String name, int defaultVal) { String value = attr.getValue(name); if (value == null) return defaultVal; try {// w w w. j a v a2 s.c o m return Integer.parseInt(value); } catch (Exception ex) { return defaultVal; } }
From source file:Main.java
/** * Returns an attribute value as a Integer value. * @param attributes the Attributes object * @param name the name of the attribute * @return the attribute value as an Integer or null if the attribute is missing *//* w w w . j a va 2 s. com*/ public static Integer getAttributeAsInteger(Attributes attributes, String name) { String s = attributes.getValue(name); if (s == null) { return null; } else { return new Integer(s); } }
From source file:Main.java
/** * Returns an attribute value as a boolean value. * @param attributes the Attributes object * @param name the name of the attribute * @param defaultValue the default value if the attribute is not specified * @return the attribute value as a boolean *///from w ww .j ava 2s . c o m public static boolean getAttributeAsBoolean(Attributes attributes, String name, boolean defaultValue) { String s = attributes.getValue(name); if (s == null) { return defaultValue; } else { return Boolean.valueOf(s).booleanValue(); } }
From source file:Main.java
/** * Returns an attribute value as a glyph position adjustments array. The string value * is expected to be a non-empty sequence of either Z<repeat> or <number>, where the * former encodes a repeat count (of zeroes) and the latter encodes a integer number, * and where each item is separated by whitespace. * @param attributes the Attributes object * @param name the name of the attribute * @return the position adjustments array */// w ww. j ava2 s.com public static int[][] getAttributeAsPositionAdjustments(Attributes attributes, String name) { String s = attributes.getValue(name); if (s == null) { return null; } else { return decodePositionAdjustments(s.trim()); } }
From source file:Main.java
/** * Returns an attribute value as a int value. * @param attributes the Attributes object * @param name the name of the attribute * @param defaultValue the default value if the attribute is not specified * @return the attribute value as an int *///www. j a v a2 s . c om public static int getAttributeAsInt(Attributes attributes, String name, int defaultValue) { String s = attributes.getValue(name); if (s == null) { return defaultValue; } else { return Integer.parseInt(s); } }
From source file:Main.java
/** * Returns an attribute value as a int value. * @param attributes the Attributes object * @param name the name of the attribute * @return the attribute value as an int * @throws SAXException if the attribute is missing *///from w w w . ja v a 2s . c o m public static int getAttributeAsInt(Attributes attributes, String name) throws SAXException { String s = attributes.getValue(name); if (s == null) { throw new SAXException("Attribute '" + name + "' is missing"); } else { return Integer.parseInt(s); } }