List of usage examples for javax.xml.stream.events XMLEvent isStartElement
public boolean isStartElement();
From source file:com.aionengine.gameserver.dataholders.loadingutils.XmlMerger.java
/** * This method processes the source file, replacing all of * the 'import' tags by the data from the relevant files. * * @throws XMLStreamException on event writing error. * @throws IOException if the destination file exists but is a directory rather than * a regular file, does not exist but cannot be created, * or cannot be opened for any other reason *///from w w w . j av a 2 s . c om private void doUpdate() throws XMLStreamException, IOException { XMLEventReader reader = null; XMLEventWriter writer = null; Properties metadata = new Properties(); try { writer = outputFactory.createXMLEventWriter(new BufferedWriter(new FileWriter(destFile, false))); reader = inputFactory.createXMLEventReader(new FileReader(sourceFile)); while (reader.hasNext()) { final XMLEvent xmlEvent = reader.nextEvent(); if (xmlEvent.isStartElement() && isImportQName(xmlEvent.asStartElement().getName())) { processImportElement(xmlEvent.asStartElement(), writer, metadata); continue; } if (xmlEvent.isEndElement() && isImportQName(xmlEvent.asEndElement().getName())) continue; if (xmlEvent instanceof Comment)// skip comments. continue; if (xmlEvent.isCharacters())// skip whitespaces. if (xmlEvent.asCharacters().isWhiteSpace() || xmlEvent.asCharacters().isIgnorableWhiteSpace())// skip whitespaces. continue; writer.add(xmlEvent); if (xmlEvent.isStartDocument()) { writer.add(eventFactory.createComment("\nThis file is machine-generated. DO NOT MODIFY IT!\n")); } } storeFileModifications(metadata, metaDataFile); } finally { if (writer != null) try { writer.close(); } catch (Exception ignored) { } if (reader != null) try { reader.close(); } catch (Exception ignored) { } } }
From source file:com.aionemu.gameserver.dataholders.loadingutils.XmlMerger.java
/** * This method processes the source file, replacing all of the 'import' tags * by the data from the relevant files./*w ww .j av a 2 s. c om*/ * * @throws XMLStreamException on event writing error. * @throws IOException if the destination file exists but is a directory * rather than a regular file, does not exist but cannot be created, or * cannot be opened for any other reason */ private void doUpdate() throws XMLStreamException, IOException { XMLEventReader reader = null; XMLEventWriter writer = null; Properties metadata = new Properties(); try { writer = outputFactory.createXMLEventWriter(new BufferedWriter(new FileWriter(destFile, false))); reader = inputFactory.createXMLEventReader(new FileReader(sourceFile)); while (reader.hasNext()) { final XMLEvent xmlEvent = reader.nextEvent(); if (xmlEvent.isStartElement() && isImportQName(xmlEvent.asStartElement().getName())) { processImportElement(xmlEvent.asStartElement(), writer, metadata); continue; } if (xmlEvent.isEndElement() && isImportQName(xmlEvent.asEndElement().getName())) { continue; } if (xmlEvent instanceof Comment)// skip comments. { continue; } if (xmlEvent.isCharacters())// skip whitespaces. { if (xmlEvent.asCharacters().isWhiteSpace() || xmlEvent.asCharacters().isIgnorableWhiteSpace())// skip // whitespaces. { continue; } } writer.add(xmlEvent); if (xmlEvent.isStartDocument()) { writer.add(eventFactory.createComment("\nThis file is machine-generated. DO NOT MODIFY IT!\n")); } } storeFileModifications(metadata, metaDataFile); } finally { if (writer != null) { try { writer.close(); } catch (Exception ignored) { } } if (reader != null) { try { reader.close(); } catch (Exception ignored) { } } } }
From source file:de.dfki.km.leech.parser.wikipedia.WikipediaDumpParser.java
public MultiValueHashMap<String, String> getPageTitle2Redirects(InputStream sWikipediaDump) throws FileNotFoundException, XMLStreamException { // <text xml:space="preserve">#REDIRECT [[Autopoiesis]]</text> // <text xml:space="preserve">#REDIRECT:[[Hans Leo Haler]]</text> // <text xml:space="preserve">#redirect [[Weier Hai]]</text> // #weiterleitung // <page> // <title>Autopoiesis</title> Logger.getLogger(WikipediaDumpParser.class.getName()).info("will collect redirects from wikipedia dump..."); MultiValueHashMap<String, String> hsPageTitle2Redirects = new MultiValueBalancedTreeMap<String, String>(); String strCurrentTitle = ""; XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance(); XMLEventReader xmlEventReader = xmlInputFactory.createXMLEventReader(sWikipediaDump, "Utf-8"); int iTitlesRead = 0; while (xmlEventReader.hasNext()) { XMLEvent xmlEvent = xmlEventReader.nextEvent(); if (!xmlEvent.isStartElement()) continue; // wenn wir einen Title haben, dann merken wir uns den, falls wir ihn brauchen if (xmlEvent.asStartElement().getName().getLocalPart().equals("title")) { strCurrentTitle = readNextCharEventsText(xmlEventReader); iTitlesRead++;//from ww w . j av a 2 s .c o m if (iTitlesRead % 200000 == 0) Logger.getLogger(WikipediaDumpParser.class.getName()) .info("read doc #" + StringUtils.beautifyNumber(iTitlesRead)); continue; } if (!xmlEvent.asStartElement().getName().getLocalPart().equals("text")) continue; // jetzt haben wir ein text-tag. Wir schauen, ob jetzt ein redirect kommt // entweder kommt ein charEvent oder ein EndEvent. Leere Texte gibts wohl auch XMLEvent nextEvent = xmlEventReader.peek(); if (!nextEvent.isCharacters()) continue; String strCharEventData = readNextCharEventsText(xmlEventReader); if (strCharEventData == null) continue; strCharEventData = strCharEventData.trim(); boolean bRedirect = false; if (strCharEventData.length() >= 9 && strCharEventData.substring(0, 9).equalsIgnoreCase("#redirect")) bRedirect = true; if (!bRedirect && strCharEventData.length() >= 8 && strCharEventData.substring(0, 8).equalsIgnoreCase("redirect") && !strCharEventData.contains("\n")) bRedirect = true; if (!bRedirect && strCharEventData.length() >= 14 && strCharEventData.substring(0, 14).equalsIgnoreCase("#weiterleitung")) bRedirect = true; if (!bRedirect && strCharEventData.length() >= 13 && strCharEventData.substring(0, 13).equalsIgnoreCase("weiterleitung") && !strCharEventData.contains("\n")) bRedirect = true; if (!bRedirect) continue; // wir haben einen redirect - der wird in unsere Datenstruktur eingetragen int iStart = strCharEventData.indexOf("[["); int iEnd = strCharEventData.indexOf("]]"); if (iStart < 0 || iEnd < 0) continue; if (iEnd <= iStart) continue; if ((iStart + 2) > strCharEventData.length() || iEnd > strCharEventData.length()) continue; String strRedirectTarget = strCharEventData.substring(iStart + 2, iEnd).trim(); hsPageTitle2Redirects.add(strRedirectTarget, strCurrentTitle); // if("Venceslav Konstantinov".equalsIgnoreCase(strCurrentTitle) || "Venceslav Konstantinov".equalsIgnoreCase(strRedirectTarget)) // System.out.println("redirect found: (" + hsPageTitle2Redirects.keySize() + ") " + strCurrentTitle + " => '" + strRedirectTarget + "'"); } Logger.getLogger(WikipediaDumpParser.class.getName()) .info("Redirects found: " + StringUtils.beautifyNumber(hsPageTitle2Redirects.valueSize())); return hsPageTitle2Redirects; }
From source file:edu.monash.merc.system.parser.xml.HPAWSXmlParser.java
public List<HPAEntryBean> parseHPAXml(String fileName, XMLInputFactory2 factory2) { xmlif2 = factory2;//from www . ja va 2 s .c o m logger.info("Starting to parse " + fileName); List<HPAEntryBean> hpaEntryBeans = new ArrayList<HPAEntryBean>(); XMLEventReader2 xmlEventReader = null; try { xmlEventReader = (XMLEventReader2) xmlif2.createXMLEventReader(new FileInputStream(fileName)); QName entryQN = new QName(ELE_ENTRY); QName versionQN = new QName(ATTR_VERSION); QName urlQN = new QName(ATTR_URL); QName nameQN = new QName(ELE_NAME); QName identiferQN = new QName(ELE_IDENTIFIER); QName idQN = new QName(ATTR_ID); QName xrefQN = new QName(ELE_XREF); QName dbQN = new QName(ATTR_DB); QName tissueExpQN = new QName(ELE_TISSUE_EXPRESSION); QName typeQN = new QName(ATTR_TYPE); QName verificationQN = new QName(ELE_VERIFICATION); QName dataQN = new QName(ELE_DATA); QName tissueQN = new QName(ELE_TISSUE); QName statusQN = new QName(ATTR_STATUS); QName cellTypeQN = new QName(ELE_CELLTYPE); QName levelQN = new QName(ELE_LEVEL); QName antibodyQN = new QName(ELE_ANTIBODY); String version = null; String url = null; String geneName = null; String geneAccession = null; String dbNameForIdentifier = null; String xrefAc = null; String xrefDb = null; boolean tissueExpressionPresent = false; boolean antibodyPresent = false; String tissueStatus = null; String tissue = null; String cellType = null; String levelType = null; String level = null; String verificationType = null; String verification = null; HPAEntryBean hpaEntryBean = null; GeneBean geneBean = null; List<DbSourceAcEntryBean> dbSourceAcEntryBeans = new ArrayList<DbSourceAcEntryBean>(); List<PEEvidenceBean> peAntiIHCNormEvidenceBeans = new ArrayList<PEEvidenceBean>(); PEEvidenceBean antiIHCNormEvidenceBean = null; AccessionBean identifiedAcBean = null; while (xmlEventReader.hasNextEvent()) { //eventType = reader.next(); XMLEvent event = xmlEventReader.nextEvent(); if (event.isStartElement()) { StartElement element = event.asStartElement(); //hpa entry if (element.getName().equals(entryQN)) { //start to create a hpaEntryBean hpaEntryBean = new HPAEntryBean(); //create a GeneBean geneBean = new GeneBean(); //create a list of DbSourceAcEntryBean to store all DbSource and Ac dbSourceAcEntryBeans = new ArrayList<DbSourceAcEntryBean>(); //create a list of PEEvidenceBean to store all antibody evidencs for the current gene peAntiIHCNormEvidenceBeans = new ArrayList<PEEvidenceBean>(); //get the version attribute Attribute versionAttr = element.getAttributeByName(versionQN); if (versionAttr != null) { version = versionAttr.getValue(); } //get the url attribute Attribute urlAttr = element.getAttributeByName(urlQN); if (urlAttr != null) { url = urlAttr.getValue(); } } //parse the gene name in the name element if (element.getName().equals(nameQN)) { if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); Characters geneCharacters = event.asCharacters(); if (geneCharacters != null) { geneName = geneCharacters.getData(); } } } //parse the ensg accession and db in the identifier element if (element.getName().equals(identiferQN)) { Attribute idAttr = element.getAttributeByName(idQN); if (idAttr != null) { geneAccession = idAttr.getValue(); } Attribute dbAttr = element.getAttributeByName(dbQN); if (dbAttr != null) { dbNameForIdentifier = dbAttr.getValue(); } } //parse all db and accession pair in xref element if (element.getName().equals(xrefQN)) { Attribute idAttr = element.getAttributeByName(idQN); if (idAttr != null) { xrefAc = idAttr.getValue(); } Attribute dbAttr = element.getAttributeByName(dbQN); if (dbAttr != null) { xrefDb = dbAttr.getValue(); } } //parse tissueExpression if (element.getName().equals(tissueExpQN)) { //we only focus on the tissueExpression element in the path /entry/tissueExpression if (!antibodyPresent) { //set the tissueExpression present flag into true; tissueExpressionPresent = true; //create a list of PEEvidenceBean to store the PEEvidence for antibody peAntiIHCNormEvidenceBeans = new ArrayList<PEEvidenceBean>(); } } //parse the verification element to get reliability or validation value if (element.getName().equals(verificationQN)) { //we only focus on the verification element in the path /entry/tissueExpression/verification if (!antibodyPresent && tissueExpressionPresent) { Attribute verifAttr = element.getAttributeByName(typeQN); if (verifAttr != null) { verificationType = element.getAttributeByName(typeQN).getValue(); } if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); verification = event.asCharacters().getData(); } } } //start of the data element if (element.getName().equals(dataQN)) { //we only focus on the data element in the path /entry/tissueExpression/data if (!antibodyPresent && tissueExpressionPresent) { antiIHCNormEvidenceBean = new PEEvidenceBean(); TPBDataTypeBean dataTypeBean = createTPBDataTypeBeanForPEANTIIHCNORM(); antiIHCNormEvidenceBean.setTpbDataTypeBean(dataTypeBean); } } //start of tissue if (element.getName().equals(tissueQN)) { //we only focus on the tissue element in the path /entry/tissueExpression/data/tissue if (!antibodyPresent && tissueExpressionPresent) { Attribute tissueStatusAttr = element.getAttributeByName(statusQN); if (tissueStatusAttr != null) { tissueStatus = tissueStatusAttr.getValue(); } if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); tissue = event.asCharacters().getData(); } } } //start of cellType if (element.getName().equals(cellTypeQN)) { //we only focus on the cellType element in the path /entry/tissueExpression/data/cellType if (!antibodyPresent && tissueExpressionPresent) { if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); cellType = event.asCharacters().getData(); } } } //start of level if (element.getName().equals(levelQN)) { //we only focus on the level element in the path /entry/tissueExpression/data/level if (!antibodyPresent && tissueExpressionPresent) { Attribute typeAttr = element.getAttributeByName(typeQN); if (typeAttr != null) { levelType = typeAttr.getValue(); } if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); level = event.asCharacters().getData(); } } } //start of antibody element if (element.getName().equals(antibodyQN)) { //we have to setup antibodyPresent flag as true antibodyPresent = true; } } //End of element if (event.isEndElement()) { EndElement endElement = event.asEndElement(); //hpa entry end if (endElement.getName().equals(entryQN)) { //set hpa version hpaEntryBean.setHpaVersion(version); //hpaEntryBean set gene bean hpaEntryBean.setGeneBean(geneBean); //create the primary dbsource bean DBSourceBean primaryDbSourceBean = createPrimaryDBSourceBeanForHPA(); //set the primary DBSourceBean hpaEntryBean.setPrimaryDbSourceBean(primaryDbSourceBean); //set the identified accesion bean hpaEntryBean.setIdentifiedAccessionBean(identifiedAcBean); //set DbSourceAcEntryBean list hpaEntryBean.setDbSourceAcEntryBeans(dbSourceAcEntryBeans); //set all the PeAntiIHCBody evidences if (peAntiIHCNormEvidenceBeans.size() == 0) { peAntiIHCNormEvidenceBeans.add(createNonePEEvidence(url)); } hpaEntryBean.setPeAntiIHCNormEvidencesBeans(peAntiIHCNormEvidenceBeans); //add the current hpa entry bean into list hpaEntryBeans.add(hpaEntryBean); //reset version and url version = null; url = null; identifiedAcBean = null; } //end of gene name, populate the gene name if (endElement.getName().equals(nameQN)) { //set gene name geneBean.setDisplayName(geneName); } //end of identifier, populating for gene accession, db and accessions if any if (endElement.getName().equals(identiferQN)) { //set the gene accession geneBean.setEnsgAccession(geneAccession); identifiedAcBean = createIdentifiedAcBean(geneAccession, dbNameForIdentifier); //create a DbSourceAcEntryBean based on the identifier element DbSourceAcEntryBean dbSourceAcEntryBean = createDbSourceAcEntry(dbNameForIdentifier, geneAccession); //add this DbSourceAcEntryBean into list dbSourceAcEntryBeans.add(dbSourceAcEntryBean); } //end of xref element. populate for db and accessions if any if (endElement.getName().equals(xrefQN)) { //create a DbSourceAcEntryBean based on the xref element DbSourceAcEntryBean dbSourceAcEntryBean = createDbSourceAcEntry(xrefDb, xrefAc); //add this DbSoureAcEntryBean into list dbSourceAcEntryBeans.add(dbSourceAcEntryBean); //set rest of db and accession values xrefDb = null; xrefAc = null; } //end of the tissueExpression if (endElement.getName().equals(tissueExpQN)) { //we only focus on the tissueExpression element in the path /entry/tissueExpression if (!antibodyPresent) { //the tissueExpression is end. we have to reset tissueExpressionPresent, //verificationType and verification values under the tissueExpression element level //reset tissueExpression present flag into false tissueExpressionPresent = false; //reset verification type verificationType = null; //reset verification value verification = null; } } //end of data element if (endElement.getName().equals(dataQN)) { //we only focus on the data element in the path /entry/tissueExpression/data if (!antibodyPresent && tissueExpressionPresent) { //we only consider the tissue status is normal one if (StringUtils.endsWithIgnoreCase(tissueStatus, TISSUE_STATUS_NORMAL)) { setAntiEvidence(antiIHCNormEvidenceBean, url, verification, tissue, cellType, level, levelType); //add anti evidence peAntiIHCNormEvidenceBeans.add(antiIHCNormEvidenceBean); } //the data element is end. we have to reset the tissueStatus, tissue, cellType and level values under the data element level tissueStatus = null; tissue = null; cellType = null; level = null; levelType = null; } } //end of antibody if (endElement.getName().equals(antibodyQN)) { //we have to reset antibodyPresent flag as false antibodyPresent = false; } } //End of XML document if (event.isEndDocument()) { // finished to parse the whole document; break; } } } catch (Exception ex) { logger.error(ex); throw new DMXMLParserException(ex); } finally { if (xmlEventReader != null) { try { xmlEventReader.close(); } catch (Exception e) { //ignore whatever caught. } } } return hpaEntryBeans; }
From source file:json_to_xml_1.java
public int execute(String args[]) throws ProgramTerminationException { this.getInfoMessages().clear(); if (args.length < 2) { throw constructTermination("messageArgumentsMissing", null, getI10nString("messageArgumentsMissingUsage") + "\n\tjson_to_xml_1 " + getI10nString("messageParameterList") + "\n"); }/*from w w w. ja v a 2 s. c om*/ File resultInfoFile = new File(args[1]); try { resultInfoFile = resultInfoFile.getCanonicalFile(); } catch (SecurityException ex) { throw constructTermination("messageResultInfoFileCantGetCanonicalPath", ex, null, resultInfoFile.getAbsolutePath()); } catch (IOException ex) { throw constructTermination("messageResultInfoFileCantGetCanonicalPath", ex, null, resultInfoFile.getAbsolutePath()); } if (resultInfoFile.exists() == true) { if (resultInfoFile.isFile() == true) { if (resultInfoFile.canWrite() != true) { throw constructTermination("messageResultInfoFileIsntWritable", null, null, resultInfoFile.getAbsolutePath()); } } else { throw constructTermination("messageResultInfoPathIsntAFile", null, null, resultInfoFile.getAbsolutePath()); } } json_to_xml_1.resultInfoFile = resultInfoFile; File jobFile = new File(args[0]); try { jobFile = jobFile.getCanonicalFile(); } catch (SecurityException ex) { throw constructTermination("messageJobFileCantGetCanonicalPath", ex, null, jobFile.getAbsolutePath()); } catch (IOException ex) { throw constructTermination("messageJobFileCantGetCanonicalPath", ex, null, jobFile.getAbsolutePath()); } if (jobFile.exists() != true) { throw constructTermination("messageJobFileDoesntExist", null, null, jobFile.getAbsolutePath()); } if (jobFile.isFile() != true) { throw constructTermination("messageJobPathIsntAFile", null, null, jobFile.getAbsolutePath()); } if (jobFile.canRead() != true) { throw constructTermination("messageJobFileIsntReadable", null, null, jobFile.getAbsolutePath()); } System.out.println("json_to_xml_1: " + getI10nStringFormatted("messageCallDetails", jobFile.getAbsolutePath(), resultInfoFile.getAbsolutePath())); File inputFile = null; File outputFile = null; try { XMLInputFactory inputFactory = XMLInputFactory.newInstance(); InputStream in = new FileInputStream(jobFile); XMLEventReader eventReader = inputFactory.createXMLEventReader(in); while (eventReader.hasNext() == true) { XMLEvent event = eventReader.nextEvent(); if (event.isStartElement() == true) { String tagName = event.asStartElement().getName().getLocalPart(); if (tagName.equals("json-input-file") == true) { StartElement inputFileElement = event.asStartElement(); Attribute pathAttribute = inputFileElement.getAttributeByName(new QName("path")); if (pathAttribute == null) { throw constructTermination("messageJobFileEntryIsMissingAnAttribute", null, null, jobFile.getAbsolutePath(), tagName, "path"); } String inputFilePath = pathAttribute.getValue(); if (inputFilePath.isEmpty() == true) { throw constructTermination("messageJobFileAttributeValueIsEmpty", null, null, jobFile.getAbsolutePath(), tagName, "path"); } inputFile = new File(inputFilePath); if (inputFile.isAbsolute() != true) { inputFile = new File( jobFile.getAbsoluteFile().getParent() + File.separator + inputFilePath); } try { inputFile = inputFile.getCanonicalFile(); } catch (SecurityException ex) { throw constructTermination("messageInputFileCantGetCanonicalPath", ex, null, new File(inputFilePath).getAbsolutePath(), jobFile.getAbsolutePath()); } catch (IOException ex) { throw constructTermination("messageInputFileCantGetCanonicalPath", ex, null, new File(inputFilePath).getAbsolutePath(), jobFile.getAbsolutePath()); } if (inputFile.exists() != true) { throw constructTermination("messageInputFileDoesntExist", null, null, inputFile.getAbsolutePath(), jobFile.getAbsolutePath()); } if (inputFile.isFile() != true) { throw constructTermination("messageInputPathIsntAFile", null, null, inputFile.getAbsolutePath(), jobFile.getAbsolutePath()); } if (inputFile.canRead() != true) { throw constructTermination("messageInputFileIsntReadable", null, null, inputFile.getAbsolutePath(), jobFile.getAbsolutePath()); } } else if (tagName.equals("xml-output-file") == true) { StartElement outputFileElement = event.asStartElement(); Attribute pathAttribute = outputFileElement.getAttributeByName(new QName("path")); if (pathAttribute == null) { throw constructTermination("messageJobFileEntryIsMissingAnAttribute", null, null, jobFile.getAbsolutePath(), tagName, "path"); } String outputFilePath = pathAttribute.getValue(); if (outputFilePath.isEmpty() == true) { throw constructTermination("messageJobFileAttributeValueIsEmpty", null, null, jobFile.getAbsolutePath(), tagName, "path"); } outputFile = new File(outputFilePath); if (outputFile.isAbsolute() != true) { outputFile = new File( jobFile.getAbsoluteFile().getParent() + File.separator + outputFilePath); } try { outputFile = outputFile.getCanonicalFile(); } catch (SecurityException ex) { throw constructTermination("messageOutputFileCantGetCanonicalPath", ex, null, new File(outputFilePath).getAbsolutePath(), jobFile.getAbsolutePath()); } catch (IOException ex) { throw constructTermination("messageOutputFileCantGetCanonicalPath", ex, null, new File(outputFilePath).getAbsolutePath(), jobFile.getAbsolutePath()); } if (outputFile.exists() == true) { if (outputFile.isFile() == true) { if (outputFile.canWrite() != true) { throw constructTermination("messageOutputFileIsntWritable", null, null, outputFile.getAbsolutePath()); } } else { throw constructTermination("messageOutputPathIsntAFile", null, null, outputFile.getAbsolutePath()); } } } } } } catch (XMLStreamException ex) { throw constructTermination("messageJobFileErrorWhileReading", ex, null, jobFile.getAbsolutePath()); } catch (SecurityException ex) { throw constructTermination("messageJobFileErrorWhileReading", ex, null, jobFile.getAbsolutePath()); } catch (IOException ex) { throw constructTermination("messageJobFileErrorWhileReading", ex, null, jobFile.getAbsolutePath()); } if (inputFile == null) { throw constructTermination("messageJobFileNoInputFile", null, null, jobFile.getAbsolutePath()); } if (outputFile == null) { throw constructTermination("messageJobFileNoOutputFile", null, null, jobFile.getAbsolutePath()); } StringBuilder stringBuilder = new StringBuilder(); try { JSONObject json = new JSONObject(new JSONTokener(new BufferedReader(new FileReader(inputFile)))); stringBuilder.append(XML.toString(json)); } catch (Exception ex) { throw constructTermination("messageConversionError", ex, null, inputFile.getAbsolutePath()); } try { BufferedWriter writer = new BufferedWriter( new OutputStreamWriter(new FileOutputStream(outputFile), "UTF-8")); writer.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"); writer.write( "<!-- This file was created by json_to_xml_1, which is free software licensed under the GNU Affero General Public License 3 or any later version (see https://github.com/publishing-systems/digital_publishing_workflow_tools/ and http://www.publishing-systems.org). -->\n"); writer.write(stringBuilder.toString()); writer.flush(); writer.close(); } catch (FileNotFoundException ex) { throw constructTermination("messageOutputFileWritingError", ex, null, outputFile.getAbsolutePath()); } catch (UnsupportedEncodingException ex) { throw constructTermination("messageOutputFileWritingError", ex, null, outputFile.getAbsolutePath()); } catch (IOException ex) { throw constructTermination("messageOutputFileWritingError", ex, null, outputFile.getAbsolutePath()); } return 0; }
From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java
@Override public Communication fromCharacterBasedFile(final Path path) throws IngestException { if (!Files.exists(path)) throw new IngestException("No file at: " + path.toString()); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(); AnalyticUUIDGenerator g = f.create(); Communication c = new Communication(); c.setUuid(g.next());//from w ww .j a v a2 s . c om c.setType(this.getKind()); c.setMetadata(TooledMetadataConverter.convert(this)); try { ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path); c.setId(ef.getName().split("\\.")[0]); } catch (NoSuchFileException | NotFileException e) { // might throw if path is a directory. throw new IngestException(path.toString() + " is not a file, or is a directory."); } String content; try (InputStream is = Files.newInputStream(path); BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) { content = IOUtils.toString(bin, StandardCharsets.UTF_8); c.setText(content); } catch (IOException e) { throw new IngestException(e); } try (InputStream is = Files.newInputStream(path); BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8); BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) { XMLEventReader rdr = null; try { rdr = inF.createXMLEventReader(reader); // Below method moves the reader // to the headline end element. Section headline = this.handleBeginning(rdr, content, c); headline.setUuid(g.next()); c.addToSectionList(headline); TextSpan sts = headline.getTextSpan(); LOGGER.debug("headline text: {}", c.getText().substring(sts.getStart(), sts.getEnding())); int sectNumber = 1; int subSect = 0; int currOff = -1; // Big amounts of characters. while (rdr.hasNext()) { XMLEvent nextEvent = rdr.nextEvent(); currOff = nextEvent.getLocation().getCharacterOffset(); // First: see if document is going to end. // If yes: exit. if (nextEvent.isEndDocument()) break; // region // enables ingestion of quotes inside a usenet webpost. // by Tongfei Chen if (nextEvent.isStartElement() && nextEvent.asStartElement().getName().equals(QName.valueOf("QUOTE"))) { Attribute attrQuote = nextEvent.asStartElement() .getAttributeByName(QName.valueOf("PREVIOUSPOST")); String quote = StringEscapeUtils.escapeXml(attrQuote.getValue()); int location = attrQuote.getLocation().getCharacterOffset() + "<QUOTE PREVIOUSPOST=\"".length(); Section quoteSection = new Section(g.next(), "quote") .setTextSpan(new TextSpan(location, location + quote.length())); c.addToSectionList(quoteSection); } // endregion // Check if start element. if (nextEvent.isCharacters()) { Characters chars = nextEvent.asCharacters(); if (!chars.isWhiteSpace()) { String fpContent = chars.getData(); LOGGER.debug("Character offset: {}", currOff); LOGGER.debug("Character based data: {}", fpContent); SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent); final int tsb = currOff + pads.getKey(); final int tse = currOff + fpContent.replace("\"", """).replace("<", "<") .replace(">", ">").length() - (pads.getValue()); // MAINTAIN CORRECT TEXT SPAN // CANNOT USE StringEscapeUtils.escapeXml because it will escape "'", which // is not escaped in the data // @tongfei LOGGER.debug("Section text: {}", content.substring(tsb, tse)); TextSpan ts = new TextSpan(tsb, tse); String sk; if (subSect == 0) sk = "poster"; else if (subSect == 1) sk = "postdate"; else sk = "post"; Section s = new Section(); s.setKind(sk); s.setTextSpan(ts); s.setUuid(g.next()); List<Integer> intList = new ArrayList<>(); intList.add(sectNumber); intList.add(subSect); s.setNumberList(intList); c.addToSectionList(s); subSect++; } } else if (nextEvent.isEndElement()) { EndElement ee = nextEvent.asEndElement(); currOff = ee.getLocation().getCharacterOffset(); QName name = ee.getName(); String localName = name.getLocalPart(); LOGGER.debug("Hit end element: {}", localName); if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) { LOGGER.debug("Switching to new post."); sectNumber++; subSect = 0; } else if (localName.equalsIgnoreCase(TEXT_LOCAL_NAME)) { // done with document. break; } } } return c; } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException | ClassCastException x) { throw new IngestException(x); } finally { if (rdr != null) try { rdr.close(); } catch (XMLStreamException e) { // not likely. LOGGER.info("Error closing XMLReader.", e); } } } catch (IOException e) { throw new IngestException(e); } }
From source file:microsoft.exchange.webservices.data.core.EwsXmlReader.java
public boolean readToDescendant(String localName, String namespaceURI) throws XMLStreamException { if (!this.isStartElement()) { return false; }// ww w .j a va 2 s.co m XMLEvent startEvent = this.presentEvent; XMLEvent event = this.presentEvent; do { if (event.isStartElement()) { QName qEName = event.asStartElement().getName(); if (qEName.getLocalPart().equals(localName) && qEName.getNamespaceURI().equals(namespaceURI)) { return true; } } event = this.xmlReader.nextEvent(); } while (!checkEndElement(startEvent, event)); return false; }
From source file:com.streamsets.pipeline.stage.origin.salesforce.ForceSource.java
private String bulkProduce(String lastSourceOffset, int maxBatchSize, BatchMaker batchMaker) throws StageException { String nextSourceOffset = (null == lastSourceOffset) ? RECORD_ID_OFFSET_PREFIX + conf.initialOffset : lastSourceOffset;/*from w w w .j a v a2 s .c o m*/ if (job == null) { // No job in progress - start from scratch try { String id = (lastSourceOffset == null) ? null : lastSourceOffset.substring(lastSourceOffset.indexOf(':') + 1); final String preparedQuery = prepareQuery(conf.soqlQuery, id); LOG.info("SOQL Query is: {}", preparedQuery); if (destroyed.get()) { throw new StageException(getContext().isPreview() ? Errors.FORCE_25 : Errors.FORCE_26); } job = createJob(sobjectType, bulkConnection); LOG.info("Created Bulk API job {}", job.getId()); if (destroyed.get()) { throw new StageException(getContext().isPreview() ? Errors.FORCE_25 : Errors.FORCE_26); } BatchInfo b = bulkConnection.createBatchFromStream(job, new ByteArrayInputStream(preparedQuery.getBytes(StandardCharsets.UTF_8))); LOG.info("Created Bulk API batch {}", b.getId()); processedBatches = new HashSet<>(); } catch (AsyncApiException e) { throw new StageException(Errors.FORCE_01, e); } } // We started the job already, see if the results are ready // Loop here so that we can wait for results in preview mode and not return an empty batch // Preview will cut us off anyway if we wait too long while (queryResultList == null) { if (destroyed.get()) { throw new StageException(getContext().isPreview() ? Errors.FORCE_25 : Errors.FORCE_26); } try { // PK Chunking gives us multiple batches - process them in turn batchList = bulkConnection.getBatchInfoList(job.getId()); for (BatchInfo batchInfo : batchList.getBatchInfo()) { if (batchInfo.getState() == BatchStateEnum.Failed) { LOG.error("Batch {} failed: {}", batchInfo.getId(), batchInfo.getStateMessage()); throw new StageException(Errors.FORCE_03, batchInfo.getStateMessage()); } else if (!processedBatches.contains(batchInfo.getId())) { if (batchInfo.getState() == BatchStateEnum.NotProcessed) { // Skip this batch - it's the 'original batch' in PK chunking // See https://developer.salesforce.com/docs/atlas.en-us.api_asynch.meta/api_asynch/asynch_api_code_curl_walkthrough_pk_chunking.htm LOG.info("Batch {} not processed", batchInfo.getId()); processedBatches.add(batchInfo.getId()); } else if (batchInfo.getState() == BatchStateEnum.Completed) { LOG.info("Batch {} completed", batchInfo.getId()); batch = batchInfo; queryResultList = bulkConnection.getQueryResultList(job.getId(), batch.getId()); LOG.info("Query results: {}", queryResultList.getResult()); resultIndex = 0; break; } } } if (queryResultList == null) { // Bulk API is asynchronous, so wait a little while... try { LOG.info("Waiting {} milliseconds for job {}", conf.basicConfig.maxWaitTime, job.getId()); Thread.sleep(conf.basicConfig.maxWaitTime); } catch (InterruptedException e) { LOG.debug("Interrupted while sleeping"); Thread.currentThread().interrupt(); } if (!getContext().isPreview()) { // If we're in preview, then don't return an empty batch! LOG.info("Job {} in progress", job.getId()); return nextSourceOffset; } } } catch (AsyncApiException e) { throw new StageException(Errors.FORCE_02, e); } } if (xmlEventReader == null && queryResultList != null) { // We have results - retrieve the next one! String resultId = queryResultList.getResult()[resultIndex]; resultIndex++; try { xmlEventReader = xmlInputFactory.createXMLEventReader( bulkConnection.getQueryResultStream(job.getId(), batch.getId(), resultId)); } catch (AsyncApiException e) { throw new StageException(Errors.FORCE_05, e); } catch (XMLStreamException e) { throw new StageException(Errors.FORCE_36, e); } } if (xmlEventReader != null) { int numRecords = 0; while (xmlEventReader.hasNext() && numRecords < maxBatchSize) { try { XMLEvent event = xmlEventReader.nextEvent(); if (event.isStartElement() && event.asStartElement().getName().getLocalPart().equals(RECORDS)) { // SDC-9731 will refactor record creators so we don't need this downcast String offset = ((BulkRecordCreator) recordCreator).createRecord(xmlEventReader, batchMaker, numRecords); nextSourceOffset = RECORD_ID_OFFSET_PREFIX + offset; ++numRecords; } } catch (XMLStreamException e) { throw new StageException(Errors.FORCE_37, e); } } if (!xmlEventReader.hasNext()) { // Exhausted this result - come back in on the next batch xmlEventReader = null; if (resultIndex == queryResultList.getResult().length) { // We're out of results, too! processedBatches.add(batch.getId()); queryResultList = null; batch = null; if (processedBatches.size() == batchList.getBatchInfo().length) { // And we're done with the job try { bulkConnection.closeJob(job.getId()); lastQueryCompletedTime = System.currentTimeMillis(); LOG.info("Query completed at: {}", lastQueryCompletedTime); } catch (AsyncApiException e) { LOG.error("Error closing job: {}", e); } LOG.info("Partial batch of {} records", numRecords); job = null; shouldSendNoMoreDataEvent = true; if (conf.subscribeToStreaming) { // Switch to processing events nextSourceOffset = READ_EVENTS_FROM_NOW; } else if (conf.repeatQuery == ForceRepeatQuery.FULL) { nextSourceOffset = RECORD_ID_OFFSET_PREFIX + conf.initialOffset; } else if (conf.repeatQuery == ForceRepeatQuery.NO_REPEAT) { nextSourceOffset = null; } } } } LOG.info("Full batch of {} records", numRecords); } return nextSourceOffset; }
From source file:com.aionengine.gameserver.dataholders.loadingutils.XmlMerger.java
/** * Read all {@link javax.xml.stream.events.XMLEvent}'s from specified file and write them onto the {@link javax.xml.stream.XMLEventWriter} * * @param file File to import//from w w w. j a v a 2 s . c om * @param skipRoot Skip-root flag * @param writer Destenation writer * @throws XMLStreamException On event reading/writing error. * @throws FileNotFoundException if the reading file does not exist, * is a directory rather than a regular file, * or for some other reason cannot be opened for * reading. */ private void importFile(File file, boolean skipRoot, XMLEventWriter writer, Properties metadata) throws XMLStreamException, IOException { logger.debug("Appending file " + file); metadata.setProperty(file.getPath(), makeHash(file)); XMLEventReader reader = null; try { reader = inputFactory.createXMLEventReader(new FileReader(file)); QName firstTagQName = null; while (reader.hasNext()) { XMLEvent event = reader.nextEvent(); // skip start and end of document. if (event.isStartDocument() || event.isEndDocument()) continue; // skip all comments. if (event instanceof Comment) continue; // skip white-spaces and all ignoreable white-spaces. if (event.isCharacters()) { if (event.asCharacters().isWhiteSpace() || event.asCharacters().isIgnorableWhiteSpace()) continue; } // modify root-tag of imported file. if (firstTagQName == null && event.isStartElement()) { firstTagQName = event.asStartElement().getName(); if (skipRoot) { continue; } else { StartElement old = event.asStartElement(); event = eventFactory.createStartElement(old.getName(), old.getAttributes(), null); } } // if root was skipped - skip root end too. if (event.isEndElement() && skipRoot && event.asEndElement().getName().equals(firstTagQName)) continue; // finally - write tag writer.add(event); } } finally { if (reader != null) try { reader.close(); } catch (Exception ignored) { } } }
From source file:com.logiware.accounting.domain.EdiInvoice.java
private void createEcuLineInvoice(File file) throws Exception { InputStream inputStream = null; XMLEventReader eventReader = null; try {/*ww w. j av a 2s.co m*/ XMLInputFactory inputFactory = XMLInputFactory.newInstance(); inputStream = new FileInputStream(file); eventReader = inputFactory.createXMLEventReader(inputStream); while (eventReader.hasNext()) { XMLEvent event = eventReader.nextEvent(); if (event.isStartElement()) { StartElement startElement = event.asStartElement(); if ("Header".equalsIgnoreCase(startElement.getName().toString())) { isHeader = true; elements.add("Header"); } else if ("Body".equalsIgnoreCase(startElement.getName().toString())) { isBody = true; elements.add("Body"); } else if (isBody && "Information".equalsIgnoreCase(startElement.getName().toString())) { isInformation = true; elements.add("Information"); } else if (isBody && !isInformation && "Details".equalsIgnoreCase(startElement.getName().toString())) { isDetails = true; elements.add("Details"); } else if (isBody && !isInformation && !isDetails && "Summary".equalsIgnoreCase(startElement.getName().toString())) { isSummary = true; elements.add("Summary"); } else if (null == elementType) { setElementType(startElement); } else if (null != elementType && null == characterType) { setCharacterType(startElement); } } else if (event.isCharacters()) { setValue(event.asCharacters()); } else if (event.isEndElement()) { EndElement endElement = event.asEndElement(); if (null != characterType && null != elementType) { removeCharacterType(); } else if (null != elementType) { removeElementType(endElement); } else if (isSummary && "Summary".equalsIgnoreCase(endElement.getName().toString())) { isSummary = false; } else if (isDetails && "Details".equalsIgnoreCase(endElement.getName().toString())) { isDetails = false; } else if (isBody && "Information".equalsIgnoreCase(endElement.getName().toString())) { isInformation = false; } else if ("Body".equalsIgnoreCase(endElement.getName().toString())) { isBody = false; } else if ("Header".equalsIgnoreCase(endElement.getName().toString())) { isHeader = false; } } } this.company = Company.ECU_LINE; status = new EdiInvoiceDAO().getStatus(vendorNumber, invoiceNumber); if (!elements.contains("Header")) { throw new AccountingException("Bad File. <Header> element missing"); } else if (!elements.contains("Body")) { throw new AccountingException("Bad File. <Body> missing"); } else if (!elements.contains("Information")) { throw new AccountingException("Bad File. <Information> element under <Body> missing"); } else if (!elements.contains("Details")) { throw new AccountingException("Bad File. <Details> element under <Body> missing"); } else if (!elements.contains("Summary")) { throw new AccountingException("Bad File. <Summary> element under <Body> missing"); } else if (!elements.contains("Applicationreference")) { throw new AccountingException("Bad File. <Applicationreference> element under <Header> missing"); } else if (!elements.contains("Reference")) { throw new AccountingException("Bad File. <Reference> element under <Header> missing"); } else if (!elements.contains("Sender")) { throw new AccountingException("Bad File. <Sender> element under <Header> missing"); } else if (!elements.contains("Code")) { throw new AccountingException("Bad File. <Code> element under <Sender> of <Header> missing"); } else if (!elements.contains("Invoice")) { throw new AccountingException( "Bad File. <Invoice> element under <Information> element of <Body> missing"); } else if (!elements.contains("RelatedReferences")) { throw new AccountingException( "Bad File. <RelatedReferences> element under <Information> element of <Body> missing"); } else if (!elements.contains("BY")) { throw new AccountingException( "Bad File. <Parties Qualifier=\"BY\"> under <Information> element of <Body> missing"); } else if (!elements.contains("SU")) { throw new AccountingException( "Bad File. <Parties Qualifier=\"SU\"> under <Information> element of <Body> missing"); } else if (!elements.contains("PaymentTerms")) { throw new AccountingException( "Bad File. <PaymentTerms> element under <Information> element of <Body> missing"); } else if (!elements.contains("ShipmentInformation")) { throw new AccountingException( "Bad File. <ShipmentInformation> element under <Information> element of <Body> missing"); } else if (!elements.contains("Detail")) { throw new AccountingException( "Bad File. <Detail> element under <Details> element of <Body> missing"); } else if (!elements.contains("TotalMonetaryAmount")) { throw new AccountingException( "Bad File. <TotalMonetaryAmount> element under <Summary> element of <Body> missing"); } else if (!elements.contains("TotalMonetaryAmountGroupByVAT")) { throw new AccountingException( "Bad File. <TotalMonetaryAmountGroupByVAT> element under <Summary> element of <Body> missing"); } } catch (Exception e) { throw e; } finally { if (null != eventReader) { eventReader.close(); } if (null != inputStream) { inputStream.close(); } } }