Example usage for org.apache.commons.io FileUtils readFileToString

List of usage examples for org.apache.commons.io FileUtils readFileToString

Introduction

In this page you can find the example usage for org.apache.commons.io FileUtils readFileToString.

Prototype

public static String readFileToString(File file, String encoding) throws IOException 

Source Link

Document

Reads the contents of a file into a String.

Usage

From source file:com.da.daum.DaumCafeLevelUpParser.java

public static void main(String[] args) throws IOException {
    DaumCafeLevelUpParser parser = new DaumCafeLevelUpParser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    //parser.setDaumView(listBody);

}

From source file:eu.crydee.stanfordcorenlp.Tokenizer.java

/**
 * Wrapper around Stanford CoreNLP to tokenize text.
 *
 * Give it an input dir of text files with --input-dir and it'll ouput
 * tokenized versions, one sentence per line with space separated words to
 * --output-dir (defaults to out/)./*from  w  ww  .  j a  v a2 s .  co  m*/
 *
 * @param args CLI args. Example: --input-dir my-input --output-dir
 * my-output.
 */
public static void main(String[] args) {
    ArgumentParser parser = ArgumentParsers.newArgumentParser("stanford-corenlp-tokenizer-wrapper")
            .description("Converts Mediawiki dumps to text.");
    parser.addArgument("-i", "--input-dir").required(true).help("Path of the input text files directory.");
    parser.addArgument("-o", "--output-dir").help("Path of the output text files directory.").setDefault("out");
    Params params = new Params();
    try {
        parser.parseArgs(args, params);
    } catch (ArgumentParserException ex) {
        System.err.println("Could not parse arguments: " + ex.getMessage());
        System.exit(1);
    }
    Tokenizer tokenizer = new Tokenizer();

    try {
        Files.list(Paths.get(params.inDirPath)).filter(Files::isRegularFile).map(Path::toFile).map(f -> {
            try {
                return Pair.of(f.getName(), FileUtils.readFileToString(f, StandardCharsets.UTF_8));
            } catch (IOException ex) {
                System.err.println("Could not read input text file: " + ex.getLocalizedMessage());
                throw new UncheckedIOException(ex);
            }
        }).forEach(p -> {
            String text = tokenizer.tokenizeAndSentenceSplit(p.getRight());
            try {
                FileUtils.writeStringToFile(Paths.get(params.outDirpath, p.getLeft()).toFile(), text,
                        StandardCharsets.UTF_8);
            } catch (IOException ex) {
                System.err.println("Could not write output text file: " + ex.getLocalizedMessage());
            }
        });
    } catch (IOException ex) {
        System.err.println("Could not read from input directory: " + ex.getLocalizedMessage());
    }
}

From source file:com.da.daum.DaumCafeOneLineParser.java

public static void main(String[] args) throws IOException {
    DaumCafeOneLineParser parser = new DaumCafeOneLineParser();
    String listBody = "(?)^*~.txt";
    listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", "");
    System.out.println(listBody);
    // FileUtils.writeStringToFile(new File(file), listBody, "utf-8");
    // File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt");
    File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt");
    listBody = FileUtils.readFileToString(file, "utf-8");
    Map pageMap = new HashMap();
    parser.setDaumListVoList(listBody, pageMap);
    // parser.setDaumView(listBody);

}

From source file:com.akana.demo.freemarker.templatetester.App.java

public static void main(String[] args) {

    final Options options = new Options();

    @SuppressWarnings("static-access")
    Option optionContentType = OptionBuilder.withArgName("content-type").hasArg()
            .withDescription("content type of model").create("content");
    @SuppressWarnings("static-access")
    Option optionUrlPath = OptionBuilder.withArgName("httpRequestLine").hasArg()
            .withDescription("url path and parameters in HTTP Request Line format").create("url");
    @SuppressWarnings("static-access")
    Option optionRootMessageName = OptionBuilder.withArgName("messageName").hasArg()
            .withDescription("root data object name, defaults to 'message'").create("root");
    @SuppressWarnings("static-access")
    Option optionAdditionalMessages = OptionBuilder.withArgName("dataModelPaths")
            .hasArgs(Option.UNLIMITED_VALUES).withDescription("additional message object data sources")
            .create("messages");
    @SuppressWarnings("static-access")
    Option optionDebugMessages = OptionBuilder.hasArg(false)
            .withDescription("Shows debug information about template processing").create("debug");

    Option optionHelp = new Option("help", "print this message");

    options.addOption(optionHelp);//from   ww w  .ja  v a  2  s  . co m
    options.addOption(optionContentType);
    options.addOption(optionUrlPath);
    options.addOption(optionRootMessageName);
    options.addOption(optionAdditionalMessages);
    options.addOption(optionDebugMessages);

    CommandLineParser parser = new DefaultParser();

    CommandLine cmd;
    try {
        cmd = parser.parse(options, args);

        // Check for help flag
        if (cmd.hasOption("help")) {
            showHelp(options);
            return;
        }

        String[] remainingArguments = cmd.getArgs();
        if (remainingArguments.length < 2) {
            showHelp(options);
            return;
        }
        String ftlPath, dataPath = "none";

        ftlPath = remainingArguments[0];
        dataPath = remainingArguments[1];

        String contentType = "text/xml";
        // Discover content type from file extension
        String ext = FilenameUtils.getExtension(dataPath);
        if (ext.equals("json")) {
            contentType = "json";
        } else if (ext.equals("txt")) {
            contentType = "txt";
        }
        // Override discovered content type
        if (cmd.hasOption("content")) {
            contentType = cmd.getOptionValue("content");
        }
        // Root data model name
        String rootMessageName = "message";
        if (cmd.hasOption("root")) {
            rootMessageName = cmd.getOptionValue("root");
        }
        // Additional data models
        String[] additionalModels = new String[0];
        if (cmd.hasOption("messages")) {
            additionalModels = cmd.getOptionValues("messages");
        }
        // Debug Info
        if (cmd.hasOption("debug")) {
            System.out.println(" Processing ftl   : " + ftlPath);
            System.out.println("   with data model: " + dataPath);
            System.out.println(" with content-type: " + contentType);
            System.out.println(" data model object: " + rootMessageName);
            if (cmd.hasOption("messages")) {
                System.out.println("additional models: " + additionalModels.length);
            }
        }

        Configuration cfg = new Configuration(Configuration.VERSION_2_3_23);
        cfg.setDirectoryForTemplateLoading(new File("."));
        cfg.setDefaultEncoding("UTF-8");
        cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER);

        /* Create the primary data-model */
        Map<String, Object> message = new HashMap<String, Object>();
        if (contentType.contains("json") || contentType.contains("txt")) {
            message.put("contentAsString",
                    FileUtils.readFileToString(new File(dataPath), StandardCharsets.UTF_8));
        } else {
            message.put("contentAsXml", freemarker.ext.dom.NodeModel.parse(new File(dataPath)));
        }

        if (cmd.hasOption("url")) {
            message.put("getProperty", new AkanaGetProperty(cmd.getOptionValue("url")));
        }

        Map<String, Object> root = new HashMap<String, Object>();
        root.put(rootMessageName, message);
        if (additionalModels.length > 0) {
            for (int i = 0; i < additionalModels.length; i++) {
                Map<String, Object> m = createMessageFromFile(additionalModels[i], contentType);
                root.put("message" + i, m);
            }
        }

        /* Get the template (uses cache internally) */
        Template temp = cfg.getTemplate(ftlPath);

        /* Merge data-model with template */
        Writer out = new OutputStreamWriter(System.out);
        temp.process(root, out);

    } catch (ParseException e) {
        showHelp(options);
        System.exit(1);
    } catch (IOException e) {
        System.out.println("Unable to parse ftl.");
        e.printStackTrace();
    } catch (SAXException e) {
        System.out.println("XML parsing issue.");
        e.printStackTrace();
    } catch (ParserConfigurationException e) {
        System.out.println("Unable to configure parser.");
        e.printStackTrace();
    } catch (TemplateException e) {
        System.out.println("Unable to parse template.");
        e.printStackTrace();
    }

}

From source file:edu.uthscsa.ric.papaya.builder.Builder.java

public static void main(final String[] args) {
    final Builder builder = new Builder();

    // process command line
    final CommandLine cli = builder.createCLI(args);
    builder.setUseSample(cli.hasOption(ARG_SAMPLE));
    builder.setUseAtlas(cli.hasOption(ARG_ATLAS));
    builder.setLocal(cli.hasOption(ARG_LOCAL));
    builder.setPrintHelp(cli.hasOption(ARG_HELP));
    builder.setUseImages(cli.hasOption(ARG_IMAGE));
    builder.setSingleFile(cli.hasOption(ARG_SINGLE));
    builder.setUseParamFile(cli.hasOption(ARG_PARAM_FILE));
    builder.setUseTitle(cli.hasOption(ARG_TITLE));

    // print help, if necessary
    if (builder.isPrintHelp()) {
        builder.printHelp();//from   ww w .  jav  a2  s  .co  m
        return;
    }

    // find project root directory
    if (cli.hasOption(ARG_ROOT)) {
        try {
            builder.projectDir = (new File(cli.getOptionValue(ARG_ROOT))).getCanonicalFile();
        } catch (final IOException ex) {
            System.err.println("Problem finding root directory.  Reason: " + ex.getMessage());
        }
    }

    if (builder.projectDir == null) {
        builder.projectDir = new File(System.getProperty("user.dir"));
    }

    // clean output dir
    final File outputDir = new File(builder.projectDir + "/" + OUTPUT_DIR);
    System.out.println("Cleaning output directory...");
    try {
        builder.cleanOutputDir(outputDir);
    } catch (final IOException ex) {
        System.err.println("Problem cleaning build directory.  Reason: " + ex.getMessage());
    }

    if (builder.isLocal()) {
        System.out.println("Building for local usage...");
    }

    // write JS
    final File compressedFileJs = new File(outputDir, OUTPUT_JS_FILENAME);

    // build properties
    try {
        final File buildFile = new File(builder.projectDir + "/" + BUILD_PROP_FILE);

        builder.readBuildProperties(buildFile);
        builder.buildNumber++; // increment build number
        builder.writeBuildProperties(compressedFileJs, true);
        builder.writeBuildProperties(buildFile, false);
    } catch (final IOException ex) {
        System.err.println("Problem handling build properties.  Reason: " + ex.getMessage());
    }

    String htmlParameters = null;

    if (builder.isUseParamFile()) {
        final String paramFileArg = cli.getOptionValue(ARG_PARAM_FILE);

        if (paramFileArg != null) {
            try {
                System.out.println("Including parameters...");

                final String parameters = FileUtils.readFileToString(new File(paramFileArg), "UTF-8");
                htmlParameters = "var params = " + parameters + ";";
            } catch (final IOException ex) {
                System.err.println("Problem reading parameters file! " + ex.getMessage());
            }
        }
    }

    String title = null;
    if (builder.isUseTitle()) {
        String str = cli.getOptionValue(ARG_TITLE);
        if (str != null) {
            str = str.trim();
            str = str.replace("\"", "");
            str = str.replace("'", "");

            if (str.length() > 0) {
                title = str;
                System.out.println("Using title: " + title);
            }
        }
    }

    try {
        final JSONArray loadableImages = new JSONArray();

        // sample image
        if (builder.isUseSample()) {
            System.out.println("Including sample image...");

            final File sampleFile = new File(builder.projectDir + "/" + SAMPLE_IMAGE_NII_FILE);
            final String filename = Utilities
                    .replaceNonAlphanumericCharacters(Utilities.removeNiftiExtensions(sampleFile.getName()));

            if (builder.isLocal()) {
                loadableImages.put(new JSONObject("{\"nicename\":\"Sample Image\",\"name\":\"" + filename
                        + "\",\"encode\":\"" + filename + "\"}"));
                final String sampleEncoded = Utilities.encodeImageFile(sampleFile);
                FileUtils.writeStringToFile(compressedFileJs,
                        "var " + filename + "= \"" + sampleEncoded + "\";\n", "UTF-8", true);
            } else {
                loadableImages.put(new JSONObject("{\"nicename\":\"Sample Image\",\"name\":\"" + filename
                        + "\",\"url\":\"" + SAMPLE_IMAGE_NII_FILE + "\"}"));
                FileUtils.copyFile(sampleFile, new File(outputDir + "/" + SAMPLE_IMAGE_NII_FILE));
            }
        }

        // atlas
        if (builder.isUseAtlas()) {
            Atlas atlas = null;

            try {
                String atlasArg = cli.getOptionValue(ARG_ATLAS);

                if (atlasArg == null) {
                    atlasArg = (builder.projectDir + "/" + SAMPLE_DEFAULT_ATLAS_FILE);
                }

                final File atlasXmlFile = new File(atlasArg);

                System.out.println("Including atlas " + atlasXmlFile);

                atlas = new Atlas(atlasXmlFile);
                final File atlasJavaScriptFile = atlas.createAtlas(builder.isLocal());
                System.out.println("Using atlas image file " + atlas.getImageFile());

                if (builder.isLocal()) {
                    loadableImages.put(
                            new JSONObject("{\"nicename\":\"Atlas\",\"name\":\"" + atlas.getImageFileNewName()
                                    + "\",\"encode\":\"" + atlas.getImageFileNewName() + "\",\"hide\":true}"));
                } else {
                    final File atlasImageFile = atlas.getImageFile();
                    final String atlasPath = "data/" + atlasImageFile.getName();

                    loadableImages.put(new JSONObject("{\"nicename\":\"Atlas\",\"name\":\""
                            + atlas.getImageFileNewName() + "\",\"url\":\"" + atlasPath + "\",\"hide\":true}"));
                    FileUtils.copyFile(atlasImageFile, new File(outputDir + "/" + atlasPath));
                }

                builder.writeFile(atlasJavaScriptFile, compressedFileJs);
            } catch (final IOException ex) {
                System.err.println("Problem finding atlas file.  Reason: " + ex.getMessage());
            }
        }

        // additional images
        if (builder.isUseImages()) {
            final String[] imageArgs = cli.getOptionValues(ARG_IMAGE);

            if (imageArgs != null) {
                for (final String imageArg : imageArgs) {
                    final File file = new File(imageArg);
                    System.out.println("Including image " + file);

                    final String filename = Utilities
                            .replaceNonAlphanumericCharacters(Utilities.removeNiftiExtensions(file.getName()));

                    if (builder.isLocal()) {
                        loadableImages.put(new JSONObject(
                                "{\"nicename\":\"" + Utilities.removeNiftiExtensions(file.getName())
                                        + "\",\"name\":\"" + filename + "\",\"encode\":\"" + filename + "\"}"));
                        final String sampleEncoded = Utilities.encodeImageFile(file);
                        FileUtils.writeStringToFile(compressedFileJs,
                                "var " + filename + "= \"" + sampleEncoded + "\";\n", "UTF-8", true);
                    } else {
                        final String filePath = "data/" + file.getName();
                        loadableImages.put(new JSONObject(
                                "{\"nicename\":\"" + Utilities.removeNiftiExtensions(file.getName())
                                        + "\",\"name\":\"" + filename + "\",\"url\":\"" + filePath + "\"}"));
                        FileUtils.copyFile(file, new File(outputDir + "/" + filePath));
                    }
                }
            }
        }

        File tempFileJs = null;

        try {
            tempFileJs = builder.createTempFile();
        } catch (final IOException ex) {
            System.err.println("Problem creating temp write file.  Reason: " + ex.getMessage());
        }

        // write image refs
        FileUtils.writeStringToFile(tempFileJs,
                "var " + PAPAYA_LOADABLE_IMAGES + " = " + loadableImages.toString() + ";\n", "UTF-8", true);

        // compress JS
        tempFileJs = builder.concatenateFiles(JS_FILES, "js", tempFileJs);

        System.out.println("Compressing JavaScript... ");
        FileUtils.writeStringToFile(compressedFileJs, "\n", "UTF-8", true);
        builder.compressJavaScript(tempFileJs, compressedFileJs, new YuiCompressorOptions());
        //tempFileJs.deleteOnExit();
    } catch (final IOException ex) {
        System.err.println("Problem concatenating JavaScript.  Reason: " + ex.getMessage());
    }

    // compress CSS
    final File compressedFileCss = new File(outputDir, OUTPUT_CSS_FILENAME);

    try {
        final File concatFile = builder.concatenateFiles(CSS_FILES, "css", null);
        System.out.println("Compressing CSS... ");
        builder.compressCSS(concatFile, compressedFileCss, new YuiCompressorOptions());
        concatFile.deleteOnExit();
    } catch (final IOException ex) {
        System.err.println("Problem concatenating CSS.  Reason: " + ex.getMessage());
    }

    // write HTML
    try {
        System.out.println("Writing HTML... ");
        if (builder.singleFile) {
            builder.writeHtml(outputDir, compressedFileJs, compressedFileCss, htmlParameters, title);
        } else {
            builder.writeHtml(outputDir, htmlParameters, title);
        }
    } catch (final IOException ex) {
        System.err.println("Problem writing HTML.  Reason: " + ex.getMessage());
    }

    System.out.println("Done!  Output files located at " + outputDir);
}

From source file:com.puzzle.module.send.sign.SignXml.java

public static void main(String[] args) throws Exception {
    ClientApi api = ClientApi.getSingletonClientApi();
    String xml = FileUtils.readFileToString(new File("D:\\work\\myeclipse2014\\workspace\\Ukey\\sign.xml"),
            "UTF-8");
    //            
    ExecutorService s = Executors.newFixedThreadPool(3);

    for (int i = 0; i < 1; i++) {
        s.execute(new signTest(xml));
    }/*from   w  ww .jav a 2s  . c  o m*/

    //        String signData = Base64.encodeBase64String(signedData);
    //        FileUtils.writeStringToFile(new File("D:\\sign.txt"), signData, "UTF-8");
    //        System.out.println(signData);
}

From source file:massbank.admin.Validator2.java

public static void main(String[] arguments) throws Exception {
    boolean haserror = false;
    if (arguments.length == 0) {
        Record record = validate(recordstringExample, "");
        if (record == null)
            System.err.println("Error.");
        else/* w ww. j  a v a2s.c  om*/
            System.out.println(record.toString());
    } else {
        for (String filename : arguments) {
            recordstringExample = FileUtils.readFileToString(new File(filename), StandardCharsets.UTF_8);
            Record record = validate(recordstringExample, "");
            if (record == null) {
                System.err.println("Error in " + filename);
                haserror = true;
            } else {
                //System.out.println("ok");
                //System.out.println(record.toString());
            }
        }
    }
    if (haserror)
        System.exit(1);
}

From source file:de.micromata.tpsb.doc.StaticTestDocGenerator.java

public static void main(String[] args) {

    ParserConfig.Builder bCfg = new ParserConfig.Builder();
    ParserConfig.Builder tCfg = new ParserConfig.Builder();
    tCfg.generateIndividualFiles(true);/*from w w w.  j a v a  2 s . c  om*/
    bCfg.generateIndividualFiles(true);
    List<String> la = Arrays.asList(args);
    Iterator<String> it = la.iterator();
    boolean baseDirSet = false;
    boolean ignoreLocalSettings = false;
    List<String> addRepos = new ArrayList<String>();
    StringResourceLoader.setRepository(StringResourceLoader.REPOSITORY_NAME_DEFAULT,
            new StringResourceRepositoryImpl());
    try {
        while (it.hasNext()) {
            String arg = it.next();
            String value = null;

            if ((value = getArgumentOption(it, arg, "--project-root", "-pr")) != null) {
                File f = new File(value);
                if (f.exists() == false) {
                    System.err.print("project root doesn't exists: " + f.getAbsolutePath());
                    continue;
                }
                TpsbEnvironment.get().addProjectRoots(f);
                File ts = new File(f, "src/test");
                if (ts.exists() == true) {
                    tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath()));
                    bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath()));
                }
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--test-input", "-ti")) != null) {
                File f = new File(value);
                if (f.exists() == false) {
                    System.err.print("test-input doesn't exists: " + f.getAbsolutePath());
                }
                tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value));
                bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value));
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--output-path", "-op")) != null) {
                if (baseDirSet == false) {
                    tCfg.outputDir(value);
                    bCfg.outputDir(value);
                    TpsbEnvironment.setBaseDir(value);
                    baseDirSet = true;
                } else {
                    addRepos.add(value);
                }
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--index-vmtemplate", "-ivt")) != null) {
                try {
                    String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8);
                    StringResourceRepository repo = StringResourceLoader.getRepository();
                    repo.putStringResource("customIndexTemplate", content, CharEncoding.UTF_8);
                    tCfg.indexTemplate("customIndexTemplate");
                } catch (IOException ex) {
                    throw new RuntimeException(
                            "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(),
                            ex);
                }
                continue;
            }
            if ((value = getArgumentOption(it, arg, "--test-vmtemplate", "-tvt")) != null) {
                try {
                    String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8);
                    StringResourceRepository repo = StringResourceLoader.getRepository();
                    repo.putStringResource("customTestTemplate", content, CharEncoding.UTF_8);
                    tCfg.testTemplate("customTestTemplate");
                } catch (IOException ex) {
                    throw new RuntimeException(
                            "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(),
                            ex);
                }
                continue;
            }
            if (arg.equals("--singlexml") == true) {
                tCfg.generateIndividualFiles(false);
                bCfg.generateIndividualFiles(false);
            } else if (arg.equals("--ignore-local-settings") == true) {
                ignoreLocalSettings = true;
                continue;
            }
        }
    } catch (RuntimeException ex) {
        System.err.print(ex.getMessage());
        return;
    }
    if (ignoreLocalSettings == false) {
        readLocalSettings(bCfg, tCfg);
    }
    bCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Builder,*App,*builder")) //
            .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbBuilder.class)) //
            .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbApplication.class)) //
    ;
    tCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Test,*TestCase")) //
            .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbTestSuite.class)) //
    ;

    StaticTestDocGenerator docGenerator = new StaticTestDocGenerator(bCfg.build(), tCfg.build());
    TpsbEnvironment env = TpsbEnvironment.get();
    if (addRepos.isEmpty() == false) {
        env.setIncludeRepos(addRepos);
    }
    docGenerator.parseTestBuilders();
    docGenerator.parseTestCases();
}

From source file:marytts.tools.analysis.CopySynthesis.java

/**
 * @param args//ww  w . j av a  2 s  .co  m
 */
public static void main(String[] args) throws Exception {
    String wavFilename = null;
    String labFilename = null;
    String pitchFilename = null;
    String textFilename = null;

    String locale = System.getProperty("locale");
    if (locale == null) {
        throw new IllegalArgumentException("No locale given (-Dlocale=...)");
    }

    for (String arg : args) {
        if (arg.endsWith(".txt"))
            textFilename = arg;
        else if (arg.endsWith(".wav"))
            wavFilename = arg;
        else if (arg.endsWith(".ptc"))
            pitchFilename = arg;
        else if (arg.endsWith(".lab"))
            labFilename = arg;
        else
            throw new IllegalArgumentException("Don't know how to treat argument: " + arg);
    }

    // The intonation contour
    double[] contour = null;
    double frameShiftTime = -1;
    if (pitchFilename == null) { // need to create pitch contour from wav file 
        if (wavFilename == null) {
            throw new IllegalArgumentException("Need either a pitch file or a wav file");
        }
        AudioInputStream ais = AudioSystem.getAudioInputStream(new File(wavFilename));
        AudioDoubleDataSource audio = new AudioDoubleDataSource(ais);
        PitchFileHeader params = new PitchFileHeader();
        params.fs = (int) ais.getFormat().getSampleRate();
        F0TrackerAutocorrelationHeuristic tracker = new F0TrackerAutocorrelationHeuristic(params);
        tracker.pitchAnalyze(audio);
        frameShiftTime = tracker.getSkipSizeInSeconds();
        contour = tracker.getF0Contour();
    } else { // have a pitch file -- ignore any wav file
        PitchReaderWriter f0rw = new PitchReaderWriter(pitchFilename);
        if (f0rw.contour == null) {
            throw new NullPointerException("Cannot read f0 contour from " + pitchFilename);
        }
        contour = f0rw.contour;
        frameShiftTime = f0rw.header.skipSizeInSeconds;
    }
    assert contour != null;
    assert frameShiftTime > 0;

    // The ALLOPHONES data and labels
    if (labFilename == null) {
        throw new IllegalArgumentException("No label file given");
    }
    if (textFilename == null) {
        throw new IllegalArgumentException("No text file given");
    }
    MaryTranscriptionAligner aligner = new MaryTranscriptionAligner();
    aligner.SetEnsureInitialBoundary(false);
    String labels = MaryTranscriptionAligner.readLabelFile(aligner.getEntrySeparator(),
            aligner.getEnsureInitialBoundary(), labFilename);
    MaryHttpClient mary = new MaryHttpClient();
    String text = FileUtils.readFileToString(new File(textFilename), "ASCII");
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    mary.process(text, "TEXT", "ALLOPHONES", locale, null, null, baos);
    ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
    DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
    docFactory.setNamespaceAware(true);
    DocumentBuilder builder = docFactory.newDocumentBuilder();
    Document doc = builder.parse(bais);
    aligner.alignXmlTranscriptions(doc, labels);
    assert doc != null;

    // durations
    double[] endTimes = new LabelfileDoubleDataSource(new File(labFilename)).getAllData();
    assert endTimes.length == labels.split(Pattern.quote(aligner.getEntrySeparator())).length;

    // Now add durations and f0 targets to document
    double prevEnd = 0;
    NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.PHONE, MaryXML.BOUNDARY);
    for (int i = 0; i < endTimes.length; i++) {
        Element e = (Element) ni.nextNode();
        if (e == null)
            throw new IllegalStateException("More durations than elements -- this should not happen!");
        double durInSeconds = endTimes[i] - prevEnd;
        int durInMillis = (int) (1000 * durInSeconds);
        if (e.getTagName().equals(MaryXML.PHONE)) {
            e.setAttribute("d", String.valueOf(durInMillis));
            e.setAttribute("end", new Formatter(Locale.US).format("%.3f", endTimes[i]).toString());
            // f0 targets at beginning, mid, and end of phone
            StringBuilder f0String = new StringBuilder();
            double startF0 = getF0(contour, frameShiftTime, prevEnd);
            if (startF0 != 0 && !Double.isNaN(startF0)) {
                f0String.append("(0,").append((int) startF0).append(")");
            }
            double midF0 = getF0(contour, frameShiftTime, prevEnd + 0.5 * durInSeconds);
            if (midF0 != 0 && !Double.isNaN(midF0)) {
                f0String.append("(50,").append((int) midF0).append(")");
            }
            double endF0 = getF0(contour, frameShiftTime, endTimes[i]);
            if (endF0 != 0 && !Double.isNaN(endF0)) {
                f0String.append("(100,").append((int) endF0).append(")");
            }
            if (f0String.length() > 0) {
                e.setAttribute("f0", f0String.toString());
            }
        } else { // boundary
            e.setAttribute("duration", String.valueOf(durInMillis));
        }
        prevEnd = endTimes[i];
    }
    if (ni.nextNode() != null) {
        throw new IllegalStateException("More elements than durations -- this should not happen!");
    }

    // TODO: add pitch values

    String acoustparams = DomUtils.document2String(doc);
    System.out.println("ACOUSTPARAMS:");
    System.out.println(acoustparams);
}

From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step7CollectMTurkResults.java

public static void main(String[] args) throws Exception {
    // input dir - list of xml query containers
    // /home/user-ukp/research/data/dip/wp1-documents/step4-boiler-plate/
    File inputDir = new File(args[0] + "/");

    // MTurk result file

    // output dir
    File outputDir = new File(args[2]);
    if (!outputDir.exists()) {
        outputDir.mkdirs();//from ww w.  j  a v a  2 s . co  m

    }

    // Folder with success files
    File mturkSuccessDir = new File(args[1]);

    Collection<File> files = FileUtils.listFiles(mturkSuccessDir, new String[] { "result" }, false);
    if (files.isEmpty()) {
        throw new IllegalArgumentException("Input folder is empty. " + mturkSuccessDir);
    }

    HashMap<String, List<MTurkAnnotation>> mturkAnnotations = new HashMap<>();

    // parsing all CSV files
    for (File mturkCSVResultFile : files) {
        System.out.println("Parsing " + mturkCSVResultFile.getName());

        MTurkOutputReader outputReader = new MTurkOutputReader(
                new HashSet<>(Arrays.asList("annotation", "workerid")), mturkCSVResultFile);

        // for fixing broken data input: for each hit, collect all sentence IDs
        Map<String, SortedSet<String>> hitSentences = new HashMap<>();

        // first iteration: collect the sentences
        for (Map<String, String> record : outputReader) {
            String hitID = record.get("hitid");
            if (!hitSentences.containsKey(hitID)) {
                hitSentences.put(hitID, new TreeSet<>());
            }

            String relevantSentences = record.get("Answer.relevant_sentences");
            String irrelevantSentences = record.get("Answer.irrelevant_sentences");

            if (relevantSentences != null) {
                hitSentences.get(hitID).addAll(Arrays.asList(relevantSentences.split(",")));
            }

            if (irrelevantSentences != null) {
                hitSentences.get(hitID).addAll(Arrays.asList(irrelevantSentences.split(",")));
            }
        }

        // and now second iteration
        for (Map<String, String> record : outputReader) {
            String hitID = record.get("hitid");
            String annotatorID = record.get("workerid");
            String acceptTime = record.get("assignmentaccepttime");
            String submitTime = record.get("assignmentsubmittime");
            String relevantSentences = record.get("Answer.relevant_sentences");
            String irrelevantSentences = record.get("Answer.irrelevant_sentences");
            String reject = record.get("reject");
            String filename[];
            String comment;
            String clueWeb;
            String[] relevant = {};
            String[] irrelevant = {};

            filename = record.get("annotation").split("_");
            String fileXml = filename[0];
            clueWeb = filename[1].trim();
            comment = record.get("Answer.comment");

            if (relevantSentences != null) {
                relevant = relevantSentences.split(",");
            }

            if (irrelevantSentences != null) {
                irrelevant = irrelevantSentences.split(",");
            }

            // sanitizing data: if both relevant and irrelevant are empty, that's a bug
            // we're gonna look up all sentences from this HIT and treat this assignment
            // as if there were only irrelevant ones
            if (relevant.length == 0 && irrelevant.length == 0) {
                SortedSet<String> strings = hitSentences.get(hitID);
                irrelevant = new String[strings.size()];
                strings.toArray(irrelevant);
            }

            if (reject != null) {
                System.out.println(" HIT " + hitID + " annotated by " + annotatorID + " was rejected ");
            } else {
                /*
                // relevant sentences is a comma-delimited string,
                // this regular expression is rather strange
                // it must contain digits, it might be that there is only one space or a comma or some other char
                // digits are the sentence ids. if relevant sentences do not contain digits then it is wrong
                if (relevantSentences.matches("^\\D*$") &&
                    irrelevantSentences.matches("^\\D*$")) {
                try {
                    throw new IllegalStateException(
                            "No annotations found for HIT " + hitID + " in " +
                                    fileXml + " for document " + clueWeb);
                }
                catch (IllegalStateException ex) {
                    ex.printStackTrace();
                }
                        
                }
                */
                MTurkAnnotation mturkAnnotation;
                try {
                    mturkAnnotation = new MTurkAnnotation(hitID, annotatorID, acceptTime, submitTime, comment,
                            clueWeb, relevant, irrelevant);
                } catch (IllegalArgumentException ex) {
                    throw new IllegalArgumentException("Record: " + record, ex);
                }

                List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileXml);

                if (listOfAnnotations == null) {
                    listOfAnnotations = new ArrayList<>();
                }
                listOfAnnotations.add(mturkAnnotation);
                mturkAnnotations.put(fileXml, listOfAnnotations);
            }

        }
        //            parser.close();
    }

    // Debugging: output number of HITs of a query
    System.out.println("Accepted HITs for a query:");
    for (Map.Entry e : mturkAnnotations.entrySet()) {
        ArrayList<MTurkAnnotation> a = (ArrayList<MTurkAnnotation>) e.getValue();
        System.out.println(e.getKey() + " " + a.size());
    }

    for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) {
        QueryResultContainer queryResultContainer = QueryResultContainer
                .fromXML(FileUtils.readFileToString(f, "utf-8"));
        String fileName = f.getName();
        List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileName);

        if (listOfAnnotations == null || listOfAnnotations.isEmpty()) {
            throw new IllegalStateException("No annotations for " + f.getName());
        }

        for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) {
            for (MTurkAnnotation mtAnnotation : listOfAnnotations) {
                String clueWeb = mtAnnotation.clueWeb;
                if (rankedResults.clueWebID.equals(clueWeb)) {
                    List<QueryResultContainer.MTurkRelevanceVote> mTurkRelevanceVotes = rankedResults.mTurkRelevanceVotes;
                    QueryResultContainer.MTurkRelevanceVote relevanceVote = new QueryResultContainer.MTurkRelevanceVote();
                    String annotatorID = mtAnnotation.annotatorID;
                    String hitID = mtAnnotation.hitID;
                    String acceptTime = mtAnnotation.acceptTime;
                    String submitTime = mtAnnotation.submitTime;
                    String comment = mtAnnotation.comment;
                    String[] relevant = mtAnnotation.relevant;
                    String[] irrelevant = mtAnnotation.irrelevant;
                    relevanceVote.turkID = annotatorID.trim();
                    relevanceVote.hitID = hitID.trim();
                    relevanceVote.acceptTime = acceptTime.trim();
                    relevanceVote.submitTime = submitTime.trim();
                    relevanceVote.comment = comment != null ? comment.trim() : null;
                    if (relevant.length == 0 && irrelevant.length == 0) {
                        try {
                            throw new IllegalStateException("the length of the annotations is 0"
                                    + rankedResults.clueWebID + " for HIT " + relevanceVote.hitID);
                        } catch (IllegalStateException e) {
                            e.printStackTrace();
                        }
                    }
                    for (String r : relevant) {
                        String sentenceId = r.trim();
                        if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) {
                            QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote();
                            singleSentenceVote.sentenceID = sentenceId;
                            singleSentenceVote.relevant = "true";
                            relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote);
                        }
                    }
                    for (String r : irrelevant) {
                        String sentenceId = r.trim();
                        if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) {
                            QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote();
                            singleSentenceVote.sentenceID = sentenceId;
                            singleSentenceVote.relevant = "false";
                            relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote);
                        }
                    }
                    mTurkRelevanceVotes.add(relevanceVote);
                }
            }

        }
        File outputFile = new File(outputDir, f.getName());
        FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8");
        System.out.println("Finished " + outputFile);
    }

}