Example usage for org.apache.commons.io FileUtils writeStringToFile

List of usage examples for org.apache.commons.io FileUtils writeStringToFile

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils writeStringToFile.

Prototype

public static void writeStringToFile(File file, String data, String encoding) throws IOException 

Source Link

Document

Writes a String to a file creating the file if it does not exist.

Usage

From source file:edu.lternet.pasta.common.HTMLUtility.java

public static void main(String[] args) {

    File badIn = new File("/Users/servilla/tmp/bad.txt");
    File goodOut = new File("/Users/servilla/tmp/good.txt");

    String bad = null;//from  w ww .j  a  v  a 2s .com

    try {
        bad = FileUtils.readFileToString(badIn, "UTF-8");
    } catch (IOException e) {
        System.err.println("HTMLUtility: " + e.getMessage());
        e.printStackTrace();
    }

    String good = stripNonValidHTMLCharacters(bad);

    try {
        FileUtils.writeStringToFile(goodOut, good, "UTF-8");
    } catch (IOException e) {
        System.err.println("HTMLUtility: " + e.getMessage());
        e.printStackTrace();
    }

}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step10RemoveEmptyDocuments.java

public static void main(String[] args) throws IOException {
    // input dir - list of xml query containers
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();/*  w ww.j  a v  a2 s.  c  om*/
    }

    boolean crop = args.length >= 3 && "crop".equals(args[2]);

    // first find the maximum of zero-sized documents
    int maxMissing = 7;

    /*
    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
    QueryResultContainer queryResultContainer = QueryResultContainer
            .fromXML(FileUtils.readFileToString(f, "utf-8"));
            
    // first find the maximum of zero-sized documents in a query
    int missingInQuery = 0;
            
    for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
        // boilerplate removal
        if (rankedResults.plainText == null || rankedResults.plainText.isEmpty()) {
            missingInQuery++;
        }
    }
            
    maxMissing = Math.max(missingInQuery, maxMissing);
    }
    */

    System.out.println("Max zeroLengthDocuments in query: " + maxMissing);
    // max is 7 = we're cut-off at 93

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        List<QueryResultContainer.SingleRankedResult> nonEmptyDocsList = new ArrayList<>();

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            // collect non-empty documents
            if (rankedResults.plainText != null && !rankedResults.plainText.isEmpty()) {
                nonEmptyDocsList.add(rankedResults);
            }
        }

        System.out.println("Non-empty docs coune: " + nonEmptyDocsList.size());

        if (crop) {
            // now cut at 93
            nonEmptyDocsList = nonEmptyDocsList.subList(0, (100 - maxMissing));
            System.out.println("After cropping: " + nonEmptyDocsList.size());
        }
        System.out.println("After cleaning: " + nonEmptyDocsList.size());

        queryResultContainer.rankedResults.clear();
        queryResultContainer.rankedResults.addAll(nonEmptyDocsList);

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step3AddRawDocumentsFromClueWeb.java

public static void main(String[] args) throws IOException {
    // input dir - list of xml query containers
    // step2a-retrieved-results
    File inputDir = new File(args[0]);

    // warc.bz file containing all required documents according to ClueWeb IDs
    // ltr-50queries-100docs-clueweb-export.warc.gz
    File warc = new File(args[1]);

    // output dir
    File outputDir = new File(args[2]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();// ww  w  .j  a v a2s  .co m
    }

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        // iterate over warc for each query
        WARCFileReader reader = new WARCFileReader(new Configuration(), new Path(warc.getAbsolutePath()));
        try {
            while (true) {
                WARCRecord read = reader.read();
                String trecId = read.getHeader().getField("WARC-TREC-ID");

                // now iterate over retrieved results for the query and find matching IDs
                for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
                    if (rankedResults.clueWebID.equals(trecId)) {
                        // add the raw html content
                        String fullHTTPResponse = new String(read.getContent(), "utf-8");
                        // TODO fix coding?

                        String html = removeHTTPHeaders(fullHTTPResponse);

                        rankedResults.originalHtml = sanitizeXmlChars(html.trim());
                    }
                }
            }
        } catch (EOFException e) {
            // end of file
        }

        // check if all results have filled html
        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            if (rankedResults.originalHtml == null) {
                System.err.println("Missing original html for\t" + rankedResults.clueWebID
                        + ", setting relevance to false");
                rankedResults.relevant = Boolean.FALSE.toString();
            }
        }

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:com.mvdb.etl.actions.ExtractDBChanges.java

public static void main(String[] args) throws JSONException {

    ActionUtils.setUpInitFileProperty();
    //        boolean success = ActionUtils.markActionChainBroken("Just Testing");        
    //        System.exit(success ? 0 : 1);
    ActionUtils.assertActionChainNotBroken();
    ActionUtils.assertEnvironmentSetupOk();
    ActionUtils.assertFileExists("~/.mvdb", "~/.mvdb missing. Existing.");
    ActionUtils.assertFileExists("~/.mvdb/status.InitCustomerData.complete",
            "300init-customer-data.sh not executed yet. Exiting");
    //This check is not required as data can be modified any number of times
    //ActionUtils.assertFileDoesNotExist("~/.mvdb/status.ModifyCustomerData.complete", "ModifyCustomerData already done. Start with 100init.sh if required. Exiting");

    ActionUtils.createMarkerFile("~/.mvdb/status.ExtractDBChanges.start", true);

    //String schemaDescription = "{ 'root' : [{'table' : 'orders', 'keyColumn' : 'order_id', 'updateTimeColumn' : 'update_time'}]}";

    String customerName = null;/*from  w w  w .j  a v a 2 s.c o  m*/
    final CommandLineParser cmdLinePosixParser = new PosixParser();
    final Options posixOptions = constructPosixOptions();
    CommandLine commandLine;
    try {
        commandLine = cmdLinePosixParser.parse(posixOptions, args);
        if (commandLine.hasOption("customer")) {
            customerName = commandLine.getOptionValue("customer");
        }
    } catch (ParseException parseException) // checked exception
    {
        System.err.println(
                "Encountered exception while parsing using PosixParser:\n" + parseException.getMessage());
    }

    if (customerName == null) {
        System.err.println("Could not find customerName. Aborting...");
        System.exit(1);
    }

    ApplicationContext context = Top.getContext();

    final OrderDAO orderDAO = (OrderDAO) context.getBean("orderDAO");
    final ConfigurationDAO configurationDAO = (ConfigurationDAO) context.getBean("configurationDAO");
    final GenericDAO genericDAO = (GenericDAO) context.getBean("genericDAO");
    File snapshotDirectory = getSnapshotDirectory(configurationDAO, customerName);
    try {
        FileUtils.writeStringToFile(new File("/tmp/etl.extractdbchanges.directory.txt"),
                snapshotDirectory.getName(), false);
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
        return;
    }
    long currentTime = new Date().getTime();
    Configuration lastRefreshTimeConf = configurationDAO.find(customerName, "last-refresh-time");
    Configuration schemaDescriptionConf = configurationDAO.find(customerName, "schema-description");
    long lastRefreshTime = Long.parseLong(lastRefreshTimeConf.getValue());
    OrderJsonFileConsumer orderJsonFileConsumer = new OrderJsonFileConsumer(snapshotDirectory);
    Map<String, ColumnMetadata> metadataMap = orderDAO.findMetadata();
    //write file schema-orders.dat in snapshotDirectory
    genericDAO.fetchMetadata("orders", snapshotDirectory);
    //writes files: header-orders.dat, data-orders.dat in snapshotDirectory
    JSONObject json = new JSONObject(schemaDescriptionConf.getValue());
    JSONArray rootArray = json.getJSONArray("root");
    int length = rootArray.length();
    for (int i = 0; i < length; i++) {
        JSONObject jsonObject = rootArray.getJSONObject(i);
        String table = jsonObject.getString("table");
        String keyColumnName = jsonObject.getString("keyColumn");
        String updateTimeColumnName = jsonObject.getString("updateTimeColumn");
        System.out.println("table:" + table + ", keyColumn: " + keyColumnName + ", updateTimeColumn: "
                + updateTimeColumnName);
        genericDAO.fetchAll2(snapshotDirectory, new Timestamp(lastRefreshTime), table, keyColumnName,
                updateTimeColumnName);
    }

    //Unlikely failure
    //But Need to factor this into a separate task so that extraction does not have to be repeated. 
    //Extraction is an expensive task. 
    try {
        String sourceDirectoryAbsolutePath = snapshotDirectory.getAbsolutePath();

        File sourceRelativeDirectoryPath = getRelativeSnapShotDirectory(configurationDAO,
                sourceDirectoryAbsolutePath);
        String hdfsRoot = ActionUtils.getConfigurationValue(ConfigurationKeys.GLOBAL_CUSTOMER,
                ConfigurationKeys.GLOBAL_HDFS_ROOT);
        String targetDirectoryFullPath = hdfsRoot + "/data" + sourceRelativeDirectoryPath;

        ActionUtils.copyLocalDirectoryToHdfsDirectory(sourceDirectoryAbsolutePath, targetDirectoryFullPath);
        String dirName = snapshotDirectory.getName();
        ActionUtils.setConfigurationValue(customerName, ConfigurationKeys.LAST_COPY_TO_HDFS_DIRNAME, dirName);
    } catch (Throwable e) {
        e.printStackTrace();
        logger.error("Objects Extracted from database. But copy of snapshot directory<"
                + snapshotDirectory.getAbsolutePath() + "> to hdfs <" + ""
                + ">failed. Fix the problem and redo extract.", e);
        System.exit(1);
    }

    //Unlikely failure
    //But Need to factor this into a separate task so that extraction does not have to be repeated. 
    //Extraction is an expensive task. 
    String targetZip = null;
    try {
        File targetZipDirectory = new File(snapshotDirectory.getParent(), "archives");
        if (!targetZipDirectory.exists()) {
            boolean success = targetZipDirectory.mkdirs();
            if (success == false) {
                logger.error("Objects copied to hdfs. But able to create archive directory <"
                        + targetZipDirectory.getAbsolutePath() + ">. Fix the problem and redo extract.");
                System.exit(1);
            }
        }
        targetZip = new File(targetZipDirectory, snapshotDirectory.getName() + ".zip").getAbsolutePath();
        ActionUtils.zipFullDirectory(snapshotDirectory.getAbsolutePath(), targetZip);
    } catch (Throwable e) {
        e.printStackTrace();
        logger.error("Objects copied to hdfs. But zipping of snapshot directory<"
                + snapshotDirectory.getAbsolutePath() + "> to  <" + targetZip
                + ">failed. Fix the problem and redo extract.", e);
        System.exit(1);
    }

    //orderDAO.findAll(new Timestamp(lastRefreshTime), orderJsonFileConsumer);
    Configuration updateRefreshTimeConf = new Configuration(customerName, "last-refresh-time",
            String.valueOf(currentTime));
    configurationDAO.update(updateRefreshTimeConf, String.valueOf(lastRefreshTimeConf.getValue()));
    ActionUtils.createMarkerFile("~/.mvdb/status.ExtractDBChanges.complete", true);

}

From source file:fr.ericlab.mabed.app.Main.java

public static void main(String[] args) throws IOException {
    Locale.setDefault(Locale.US);
    Configuration configuration = new Configuration();
    Corpus corpus = new Corpus(configuration);
    System.out.println("MABED: Mention-Anomaly-Based Event Detection");
    if (args.length == 0 || args[0].equals("-help")) {
        System.out.println("For more information on how to run MABED, see the README.txt file");
    } else {//from w w  w. j av  a 2s  .  com
        if (args[0].equals("-run")) {
            try {
                if (configuration.numberOfThreads > 1) {
                    System.out.println("Running the parallelized implementation with "
                            + configuration.numberOfThreads + " threads (this computer has "
                            + Runtime.getRuntime().availableProcessors() + " available threads)");
                } else {
                    System.out.println("Running the centralized implementation");
                }
                corpus.loadCorpus(configuration.numberOfThreads > 1);
                String output = "MABED: Mention-Anomaly-Based Event Detection\n" + corpus.output + "\n";
                System.out.println("-------------------------\n" + Util.getDate()
                        + " MABED is running\n-------------------------");
                output += "-------------------------\n" + Util.getDate()
                        + " MABED is running\n-------------------------\n";
                System.out.println(Util.getDate() + " Reading parameters:\n   - k = " + configuration.k
                        + ", p = " + configuration.p + ", theta = " + configuration.theta + ", sigma = "
                        + configuration.sigma);
                MABED mabed = new MABED();
                if (configuration.numberOfThreads > 1) {
                    output += mabed.applyParallelized(corpus, configuration);
                } else {
                    output += mabed.applyCentralized(corpus, configuration);
                }
                System.out.println(
                        "--------------------\n" + Util.getDate() + " MABED ended\n--------------------");
                output += "--------------------\n" + Util.getDate() + " MABED ended\n--------------------\n";
                File outputDir = new File("output");
                if (!outputDir.isDirectory()) {
                    outputDir.mkdir();
                }
                File textFile = new File("output/MABED.tex");
                FileUtils.writeStringToFile(textFile, mabed.events.toLatex(corpus), false);
                textFile = new File("output/MABED.log");
                FileUtils.writeStringToFile(textFile, output, false);
                mabed.events.printLatex(corpus);
            } catch (InterruptedException ex) {
                Logger.getLogger(Main.class.getName()).log(Level.SEVERE, null, ex);
            }
        } else {
            System.out.println("Unknown option '" + args[0]
                    + "'\nType 'java -jar MABED.jar -help' for more information on how to run MABED");
        }
    }
}

From source file:com.client.SoapClientApache.java

public static void main(String[] args) throws Exception {
    CloseableHttpClient httpclient = HttpClients.createDefault();
    try {//from   ww  w . j a v a2  s. com
        HttpGet httpGet = new HttpGet("http://google.com/");
        CloseableHttpResponse response1 = httpclient.execute(httpGet);
        // The underlying HTTP connection is still held by the response object
        // to allow the response content to be streamed directly from the network socket.
        // In order to ensure correct deallocation of system resources
        // the user MUST either fully consume the response content  or abort request
        // execution by calling CloseableHttpResponse#close().

        try {
            System.out.println("Response for http get");
            System.out.println(response1.getStatusLine());
            HttpEntity entity1 = response1.getEntity();
            // do something useful with the response body
            // and ensure it is fully consumed
            EntityUtils.consume(entity1);
        } finally {
            response1.close();
        }

        File file = new File("temConvertRq.xml");
        File fileOutXml = new File("temConvertRs.xml");
        fileOutXml.createNewFile();

        System.out.println("Before file entity");
        FileEntity entity = new FileEntity(file, ContentType.create("text/xml", "UTF-8"));

        System.out.println("Before http post");
        HttpPost httpPost = new HttpPost("http://www.w3schools.com/webservices/tempconvert.asmx");

        httpPost.setEntity(entity);
        System.out.println("Entity set");
        //Add headers
        httpPost.addHeader("SOAPAction", "http://www.w3schools.com/webservices/CelsiusToFahrenheit");
        httpPost.addHeader("Host", "www.w3schools.com");
        httpPost.addHeader("Content-Type", "text/xml; charset=utf-8");
        //httpPost.addHeader("Content-Length", "length");

        System.out.println("after headers added");
        CloseableHttpResponse response2 = httpclient.execute(httpPost);

        try {
            System.out.println("before getting the response status");
            System.out.println("Response Status= " + response2.getStatusLine());
            HttpEntity entity2 = response2.getEntity();
            String outXml = EntityUtils.toString(entity2);
            System.out.println("XMl Response!!!!!!!!! = " + outXml);

            //                entity2.writeTo(System.out);

            //System.out.println("R
            // do something useful with the response body
            // and ensure it is fully consumed
            EntityUtils.consume(entity2);

            FileUtils.writeStringToFile(fileOutXml, outXml, "UTF-8");

        } finally {
            response2.close();
        }
    } finally {
        httpclient.close();
    }
}

From source file:edu.indiana.d2i.sloan.internal.QueryVMSimulator.java

public static void main(String[] args) {
    QueryVMSimulator simulator = new QueryVMSimulator();

    CommandLineParser parser = new PosixParser();

    try {//w ww  .j a v  a  2 s .  c om
        CommandLine line = simulator.parseCommandLine(parser, args);
        String wdir = line.getOptionValue(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.WORKING_DIR));

        if (!HypervisorCmdSimulator.resourceExist(wdir)) {
            logger.error(String.format("Cannot find VM working dir: %s", wdir));
            System.exit(ERROR_CODE.get(ERROR_STATE.VM_NOT_EXIST));
        }

        Properties prop = new Properties();
        String filename = HypervisorCmdSimulator.cleanPath(wdir) + HypervisorCmdSimulator.VM_INFO_FILE_NAME;

        prop.load(new FileInputStream(new File(filename)));

        VMStatus status = new VMStatus(
                VMMode.valueOf(prop.getProperty(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.VM_MODE))),
                VMState.valueOf(prop.getProperty(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.VM_STATE))), ip,
                Integer.parseInt(prop.getProperty(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.VNC_PORT))),
                Integer.parseInt(prop.getProperty(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.SSH_PORT))),
                Integer.parseInt(prop.getProperty(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.VCPU))),
                Integer.parseInt(prop.getProperty(CMD_FLAG_VALUE.get(CMD_FLAG_KEY.MEM))));

        // write state query file so shell script can read the vm state info
        filename = HypervisorCmdSimulator.cleanPath(wdir) + QueryVMSimulator.QUERY_RES_FILE_NAME;
        FileUtils.writeStringToFile(new File(filename), status.toString(), Charset.forName("UTF-8"));

    } catch (ParseException e) {
        logger.error(String.format("Cannot parse input arguments: %s%n, expected:%n%s",
                StringUtils.join(args, " "), simulator.getUsage(100, "", 5, 5, "")));

        System.exit(ERROR_CODE.get(ERROR_STATE.INVALID_INPUT_ARGS));
    } catch (IOException e) {
        logger.error(e.getMessage(), e);
        System.exit(ERROR_CODE.get(ERROR_STATE.IO_ERR));
    }

}

From source file:eu.crydee.stanfordcorenlp.Tokenizer.java

/**
 * Wrapper around Stanford CoreNLP to tokenize text.
 *
 * Give it an input dir of text files with --input-dir and it'll ouput
 * tokenized versions, one sentence per line with space separated words to
 * --output-dir (defaults to out/).//from w  ww .ja va 2s .  co  m
 *
 * @param args CLI args. Example: --input-dir my-input --output-dir
 * my-output.
 */
public static void main(String[] args) {
    ArgumentParser parser = ArgumentParsers.newArgumentParser("stanford-corenlp-tokenizer-wrapper")
            .description("Converts Mediawiki dumps to text.");
    parser.addArgument("-i", "--input-dir").required(true).help("Path of the input text files directory.");
    parser.addArgument("-o", "--output-dir").help("Path of the output text files directory.").setDefault("out");
    Params params = new Params();
    try {
        parser.parseArgs(args, params);
    } catch (ArgumentParserException ex) {
        System.err.println("Could not parse arguments: " + ex.getMessage());
        System.exit(1);
    }
    Tokenizer tokenizer = new Tokenizer();

    try {
        Files.list(Paths.get(params.inDirPath)).filter(Files::isRegularFile).map(Path::toFile).map(f -> {
            try {
                return Pair.of(f.getName(), FileUtils.readFileToString(f, StandardCharsets.UTF_8));
            } catch (IOException ex) {
                System.err.println("Could not read input text file: " + ex.getLocalizedMessage());
                throw new UncheckedIOException(ex);
            }
        }).forEach(p -> {
            String text = tokenizer.tokenizeAndSentenceSplit(p.getRight());
            try {
                FileUtils.writeStringToFile(Paths.get(params.outDirpath, p.getLeft()).toFile(), text,
                        StandardCharsets.UTF_8);
            } catch (IOException ex) {
                System.err.println("Could not write output text file: " + ex.getLocalizedMessage());
            }
        });
    } catch (IOException ex) {
        System.err.println("Could not read from input directory: " + ex.getLocalizedMessage());
    }
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step5LinguisticPreprocessing.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // step4-boiler-plate/
    File inputDir = new File(args[0]);

    // output dir
    File outputDir = new File(args[1]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();//from   ww  w.jav a  2 s  .  c o m
    }

    // iterate over query containers
    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            //                System.out.println(rankedResults.plainText);

            if (rankedResults.plainText != null) {
                String[] lines = StringUtils.split(rankedResults.plainText, "\n");

                // collecting all cleaned lines
                List<String> cleanLines = new ArrayList<>(lines.length);
                // collecting line tags
                List<String> lineTags = new ArrayList<>(lines.length);

                for (String line : lines) {
                    // get the tag
                    String tag = null;
                    Matcher m = OPENING_TAG_PATTERN.matcher(line);

                    if (m.find()) {
                        tag = m.group(1);
                    }

                    if (tag == null) {
                        throw new IllegalArgumentException("No html tag found for line:\n" + line);
                    }

                    // replace the tag at the beginning and the end
                    String noTagText = line.replaceAll("^<\\S+>", "").replaceAll("</\\S+>$", "");

                    // do some html cleaning
                    noTagText = noTagText.replaceAll("&nbsp;", " ");

                    noTagText = noTagText.trim();

                    // add to the output
                    if (!noTagText.isEmpty()) {
                        cleanLines.add(noTagText);
                        lineTags.add(tag);
                    }
                }

                if (cleanLines.isEmpty()) {
                    // the document is empty
                    System.err.println("Document " + rankedResults.clueWebID + " in query "
                            + queryResultContainer.qID + " is empty");
                } else {
                    // now join them back to paragraphs
                    String text = StringUtils.join(cleanLines, "\n");

                    // create JCas
                    JCas jCas = JCasFactory.createJCas();
                    jCas.setDocumentText(text);
                    jCas.setDocumentLanguage("en");

                    // annotate WebParagraph
                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(WebParagraphAnnotator.class));

                    // fill the original tag information
                    List<WebParagraph> webParagraphs = new ArrayList<>(
                            JCasUtil.select(jCas, WebParagraph.class));

                    // they must be the same size as original ones
                    if (webParagraphs.size() != lineTags.size()) {
                        throw new IllegalStateException(
                                "Different size of annotated paragraphs and original lines");
                    }

                    for (int i = 0; i < webParagraphs.size(); i++) {
                        WebParagraph p = webParagraphs.get(i);
                        // get tag
                        String tag = lineTags.get(i);

                        p.setOriginalHtmlTag(tag);
                    }

                    SimplePipeline.runPipeline(jCas,
                            AnalysisEngineFactory.createEngineDescription(StanfordSegmenter.class,
                                    // only on existing WebParagraph annotations
                                    StanfordSegmenter.PARAM_ZONE_TYPES, WebParagraph.class.getCanonicalName()));

                    // now convert to XMI
                    ByteArrayOutputStream byteOutputStream = new ByteArrayOutputStream();
                    XmiCasSerializer.serialize(jCas.getCas(), byteOutputStream);

                    // encode to base64
                    String encoded = new BASE64Encoder().encode(byteOutputStream.toByteArray());

                    rankedResults.originalXmi = encoded;
                }
            }
        }

        // and save the query to output dir
        File outputFile = new File(outputDir, queryResultContainer.qID + ".xml");
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}

From source file:Inmemantlr.java

public static void main(String[] args) {
    LOGGER.info("Inmemantlr tool");

    HelpFormatter hformatter = new HelpFormatter();

    Options options = new Options();

    // Binary arguments
    options.addOption("h", "print this message");

    Option grmr = Option.builder().longOpt("grmrfiles").hasArgs().desc("comma-separated list of ANTLR files")
            .required(true).argName("grmrfiles").type(String.class).valueSeparator(',').build();

    Option infiles = Option.builder().longOpt("infiles").hasArgs()
            .desc("comma-separated list of files to parse").required(true).argName("infiles").type(String.class)
            .valueSeparator(',').build();

    Option utilfiles = Option.builder().longOpt("utilfiles").hasArgs()
            .desc("comma-separated list of utility files to be added for " + "compilation").required(false)
            .argName("utilfiles").type(String.class).valueSeparator(',').build();

    Option odir = Option.builder().longOpt("outdir")
            .desc("output directory in which the dot files will be " + "created").required(false).hasArg(true)
            .argName("outdir").type(String.class).build();

    options.addOption(infiles);/*www .j a  v  a 2  s  . c  o m*/
    options.addOption(grmr);
    options.addOption(utilfiles);
    options.addOption(odir);

    CommandLineParser parser = new DefaultParser();

    CommandLine cmd = null;

    try {
        cmd = parser.parse(options, args);
        if (cmd.hasOption('h')) {
            hformatter.printHelp("java -jar inmemantlr.jar", options);
            System.exit(0);
        }
    } catch (ParseException e) {
        hformatter.printHelp("java -jar inmemantlr.jar", options);
        LOGGER.error(e.getMessage());
        System.exit(-1);
    }

    // input files
    Set<File> ins = getFilesForOption(cmd, "infiles");
    // grammar files
    Set<File> gs = getFilesForOption(cmd, "grmrfiles");
    // utility files
    Set<File> uf = getFilesForOption(cmd, "utilfiles");
    // output dir
    Set<File> od = getFilesForOption(cmd, "outdir");

    if (od.size() > 1) {
        LOGGER.error("output directories must be less than or equal to 1");
        System.exit(-1);
    }

    if (ins.size() <= 0) {
        LOGGER.error("no input files were specified");
        System.exit(-1);
    }

    if (gs.size() <= 0) {
        LOGGER.error("no grammar files were specified");
        System.exit(-1);
    }

    LOGGER.info("create generic parser");
    GenericParser gp = null;
    try {
        gp = new GenericParser(gs.toArray(new File[gs.size()]));
    } catch (FileNotFoundException e) {
        LOGGER.error(e.getMessage());
        System.exit(-1);
    }

    if (!uf.isEmpty()) {
        try {
            gp.addUtilityJavaFiles(uf.toArray(new String[uf.size()]));
        } catch (FileNotFoundException e) {
            LOGGER.error(e.getMessage());
            System.exit(-1);
        }
    }

    LOGGER.info("create and add parse tree listener");
    DefaultTreeListener dt = new DefaultTreeListener();
    gp.setListener(dt);

    LOGGER.info("compile generic parser");
    try {
        gp.compile();
    } catch (CompilationException e) {
        LOGGER.error("cannot compile generic parser: {}", e.getMessage());
        System.exit(-1);
    }

    String fpfx = "";
    for (File of : od) {
        if (!of.exists() || !of.isDirectory()) {
            LOGGER.error("output directory does not exist or is not a " + "directory");
            System.exit(-1);
        }
        fpfx = of.getAbsolutePath();
    }

    Ast ast;
    for (File f : ins) {
        try {
            gp.parse(f);
        } catch (IllegalWorkflowException | FileNotFoundException e) {
            LOGGER.error(e.getMessage());
            System.exit(-1);
        }
        ast = dt.getAst();

        if (!fpfx.isEmpty()) {
            String of = fpfx + "/" + FilenameUtils.removeExtension(f.getName()) + ".dot";

            LOGGER.info("write file {}", of);

            try {
                FileUtils.writeStringToFile(new File(of), ast.toDot(), "UTF-8");
            } catch (IOException e) {
                LOGGER.error(e.getMessage());
                System.exit(-1);
            }
        } else {
            LOGGER.info("Tree for {} \n {}", f.getName(), ast.toDot());
        }
    }

    System.exit(0);
}