List of usage examples for org.apache.commons.io FileUtils readFileToString
public static String readFileToString(File file, String encoding) throws IOException
From source file:com.da.daum.DaumCafeLevelUpParser.java
public static void main(String[] args) throws IOException { DaumCafeLevelUpParser parser = new DaumCafeLevelUpParser(); String listBody = "(?)^*~.txt"; listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", ""); System.out.println(listBody); // FileUtils.writeStringToFile(new File(file), listBody, "utf-8"); //File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt"); File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt"); listBody = FileUtils.readFileToString(file, "utf-8"); Map pageMap = new HashMap(); parser.setDaumListVoList(listBody, pageMap); //parser.setDaumView(listBody); }
From source file:eu.crydee.stanfordcorenlp.Tokenizer.java
/** * Wrapper around Stanford CoreNLP to tokenize text. * * Give it an input dir of text files with --input-dir and it'll ouput * tokenized versions, one sentence per line with space separated words to * --output-dir (defaults to out/)./*from w ww . j a v a2 s . co m*/ * * @param args CLI args. Example: --input-dir my-input --output-dir * my-output. */ public static void main(String[] args) { ArgumentParser parser = ArgumentParsers.newArgumentParser("stanford-corenlp-tokenizer-wrapper") .description("Converts Mediawiki dumps to text."); parser.addArgument("-i", "--input-dir").required(true).help("Path of the input text files directory."); parser.addArgument("-o", "--output-dir").help("Path of the output text files directory.").setDefault("out"); Params params = new Params(); try { parser.parseArgs(args, params); } catch (ArgumentParserException ex) { System.err.println("Could not parse arguments: " + ex.getMessage()); System.exit(1); } Tokenizer tokenizer = new Tokenizer(); try { Files.list(Paths.get(params.inDirPath)).filter(Files::isRegularFile).map(Path::toFile).map(f -> { try { return Pair.of(f.getName(), FileUtils.readFileToString(f, StandardCharsets.UTF_8)); } catch (IOException ex) { System.err.println("Could not read input text file: " + ex.getLocalizedMessage()); throw new UncheckedIOException(ex); } }).forEach(p -> { String text = tokenizer.tokenizeAndSentenceSplit(p.getRight()); try { FileUtils.writeStringToFile(Paths.get(params.outDirpath, p.getLeft()).toFile(), text, StandardCharsets.UTF_8); } catch (IOException ex) { System.err.println("Could not write output text file: " + ex.getLocalizedMessage()); } }); } catch (IOException ex) { System.err.println("Could not read from input directory: " + ex.getLocalizedMessage()); } }
From source file:com.da.daum.DaumCafeOneLineParser.java
public static void main(String[] args) throws IOException { DaumCafeOneLineParser parser = new DaumCafeOneLineParser(); String listBody = "(?)^*~.txt"; listBody = listBody.replaceAll("\\*", "").replaceAll("\\*", ""); System.out.println(listBody); // FileUtils.writeStringToFile(new File(file), listBody, "utf-8"); // File file = new File("C:\\TEMP\\daum\\user\\Lak_view_.txt"); File file = new File("C:\\TEMP\\daum\\user\\Lak_list_1.txt"); listBody = FileUtils.readFileToString(file, "utf-8"); Map pageMap = new HashMap(); parser.setDaumListVoList(listBody, pageMap); // parser.setDaumView(listBody); }
From source file:com.akana.demo.freemarker.templatetester.App.java
public static void main(String[] args) { final Options options = new Options(); @SuppressWarnings("static-access") Option optionContentType = OptionBuilder.withArgName("content-type").hasArg() .withDescription("content type of model").create("content"); @SuppressWarnings("static-access") Option optionUrlPath = OptionBuilder.withArgName("httpRequestLine").hasArg() .withDescription("url path and parameters in HTTP Request Line format").create("url"); @SuppressWarnings("static-access") Option optionRootMessageName = OptionBuilder.withArgName("messageName").hasArg() .withDescription("root data object name, defaults to 'message'").create("root"); @SuppressWarnings("static-access") Option optionAdditionalMessages = OptionBuilder.withArgName("dataModelPaths") .hasArgs(Option.UNLIMITED_VALUES).withDescription("additional message object data sources") .create("messages"); @SuppressWarnings("static-access") Option optionDebugMessages = OptionBuilder.hasArg(false) .withDescription("Shows debug information about template processing").create("debug"); Option optionHelp = new Option("help", "print this message"); options.addOption(optionHelp);//from ww w .ja v a 2 s . co m options.addOption(optionContentType); options.addOption(optionUrlPath); options.addOption(optionRootMessageName); options.addOption(optionAdditionalMessages); options.addOption(optionDebugMessages); CommandLineParser parser = new DefaultParser(); CommandLine cmd; try { cmd = parser.parse(options, args); // Check for help flag if (cmd.hasOption("help")) { showHelp(options); return; } String[] remainingArguments = cmd.getArgs(); if (remainingArguments.length < 2) { showHelp(options); return; } String ftlPath, dataPath = "none"; ftlPath = remainingArguments[0]; dataPath = remainingArguments[1]; String contentType = "text/xml"; // Discover content type from file extension String ext = FilenameUtils.getExtension(dataPath); if (ext.equals("json")) { contentType = "json"; } else if (ext.equals("txt")) { contentType = "txt"; } // Override discovered content type if (cmd.hasOption("content")) { contentType = cmd.getOptionValue("content"); } // Root data model name String rootMessageName = "message"; if (cmd.hasOption("root")) { rootMessageName = cmd.getOptionValue("root"); } // Additional data models String[] additionalModels = new String[0]; if (cmd.hasOption("messages")) { additionalModels = cmd.getOptionValues("messages"); } // Debug Info if (cmd.hasOption("debug")) { System.out.println(" Processing ftl : " + ftlPath); System.out.println(" with data model: " + dataPath); System.out.println(" with content-type: " + contentType); System.out.println(" data model object: " + rootMessageName); if (cmd.hasOption("messages")) { System.out.println("additional models: " + additionalModels.length); } } Configuration cfg = new Configuration(Configuration.VERSION_2_3_23); cfg.setDirectoryForTemplateLoading(new File(".")); cfg.setDefaultEncoding("UTF-8"); cfg.setTemplateExceptionHandler(TemplateExceptionHandler.RETHROW_HANDLER); /* Create the primary data-model */ Map<String, Object> message = new HashMap<String, Object>(); if (contentType.contains("json") || contentType.contains("txt")) { message.put("contentAsString", FileUtils.readFileToString(new File(dataPath), StandardCharsets.UTF_8)); } else { message.put("contentAsXml", freemarker.ext.dom.NodeModel.parse(new File(dataPath))); } if (cmd.hasOption("url")) { message.put("getProperty", new AkanaGetProperty(cmd.getOptionValue("url"))); } Map<String, Object> root = new HashMap<String, Object>(); root.put(rootMessageName, message); if (additionalModels.length > 0) { for (int i = 0; i < additionalModels.length; i++) { Map<String, Object> m = createMessageFromFile(additionalModels[i], contentType); root.put("message" + i, m); } } /* Get the template (uses cache internally) */ Template temp = cfg.getTemplate(ftlPath); /* Merge data-model with template */ Writer out = new OutputStreamWriter(System.out); temp.process(root, out); } catch (ParseException e) { showHelp(options); System.exit(1); } catch (IOException e) { System.out.println("Unable to parse ftl."); e.printStackTrace(); } catch (SAXException e) { System.out.println("XML parsing issue."); e.printStackTrace(); } catch (ParserConfigurationException e) { System.out.println("Unable to configure parser."); e.printStackTrace(); } catch (TemplateException e) { System.out.println("Unable to parse template."); e.printStackTrace(); } }
From source file:edu.uthscsa.ric.papaya.builder.Builder.java
public static void main(final String[] args) { final Builder builder = new Builder(); // process command line final CommandLine cli = builder.createCLI(args); builder.setUseSample(cli.hasOption(ARG_SAMPLE)); builder.setUseAtlas(cli.hasOption(ARG_ATLAS)); builder.setLocal(cli.hasOption(ARG_LOCAL)); builder.setPrintHelp(cli.hasOption(ARG_HELP)); builder.setUseImages(cli.hasOption(ARG_IMAGE)); builder.setSingleFile(cli.hasOption(ARG_SINGLE)); builder.setUseParamFile(cli.hasOption(ARG_PARAM_FILE)); builder.setUseTitle(cli.hasOption(ARG_TITLE)); // print help, if necessary if (builder.isPrintHelp()) { builder.printHelp();//from ww w . jav a2 s .co m return; } // find project root directory if (cli.hasOption(ARG_ROOT)) { try { builder.projectDir = (new File(cli.getOptionValue(ARG_ROOT))).getCanonicalFile(); } catch (final IOException ex) { System.err.println("Problem finding root directory. Reason: " + ex.getMessage()); } } if (builder.projectDir == null) { builder.projectDir = new File(System.getProperty("user.dir")); } // clean output dir final File outputDir = new File(builder.projectDir + "/" + OUTPUT_DIR); System.out.println("Cleaning output directory..."); try { builder.cleanOutputDir(outputDir); } catch (final IOException ex) { System.err.println("Problem cleaning build directory. Reason: " + ex.getMessage()); } if (builder.isLocal()) { System.out.println("Building for local usage..."); } // write JS final File compressedFileJs = new File(outputDir, OUTPUT_JS_FILENAME); // build properties try { final File buildFile = new File(builder.projectDir + "/" + BUILD_PROP_FILE); builder.readBuildProperties(buildFile); builder.buildNumber++; // increment build number builder.writeBuildProperties(compressedFileJs, true); builder.writeBuildProperties(buildFile, false); } catch (final IOException ex) { System.err.println("Problem handling build properties. Reason: " + ex.getMessage()); } String htmlParameters = null; if (builder.isUseParamFile()) { final String paramFileArg = cli.getOptionValue(ARG_PARAM_FILE); if (paramFileArg != null) { try { System.out.println("Including parameters..."); final String parameters = FileUtils.readFileToString(new File(paramFileArg), "UTF-8"); htmlParameters = "var params = " + parameters + ";"; } catch (final IOException ex) { System.err.println("Problem reading parameters file! " + ex.getMessage()); } } } String title = null; if (builder.isUseTitle()) { String str = cli.getOptionValue(ARG_TITLE); if (str != null) { str = str.trim(); str = str.replace("\"", ""); str = str.replace("'", ""); if (str.length() > 0) { title = str; System.out.println("Using title: " + title); } } } try { final JSONArray loadableImages = new JSONArray(); // sample image if (builder.isUseSample()) { System.out.println("Including sample image..."); final File sampleFile = new File(builder.projectDir + "/" + SAMPLE_IMAGE_NII_FILE); final String filename = Utilities .replaceNonAlphanumericCharacters(Utilities.removeNiftiExtensions(sampleFile.getName())); if (builder.isLocal()) { loadableImages.put(new JSONObject("{\"nicename\":\"Sample Image\",\"name\":\"" + filename + "\",\"encode\":\"" + filename + "\"}")); final String sampleEncoded = Utilities.encodeImageFile(sampleFile); FileUtils.writeStringToFile(compressedFileJs, "var " + filename + "= \"" + sampleEncoded + "\";\n", "UTF-8", true); } else { loadableImages.put(new JSONObject("{\"nicename\":\"Sample Image\",\"name\":\"" + filename + "\",\"url\":\"" + SAMPLE_IMAGE_NII_FILE + "\"}")); FileUtils.copyFile(sampleFile, new File(outputDir + "/" + SAMPLE_IMAGE_NII_FILE)); } } // atlas if (builder.isUseAtlas()) { Atlas atlas = null; try { String atlasArg = cli.getOptionValue(ARG_ATLAS); if (atlasArg == null) { atlasArg = (builder.projectDir + "/" + SAMPLE_DEFAULT_ATLAS_FILE); } final File atlasXmlFile = new File(atlasArg); System.out.println("Including atlas " + atlasXmlFile); atlas = new Atlas(atlasXmlFile); final File atlasJavaScriptFile = atlas.createAtlas(builder.isLocal()); System.out.println("Using atlas image file " + atlas.getImageFile()); if (builder.isLocal()) { loadableImages.put( new JSONObject("{\"nicename\":\"Atlas\",\"name\":\"" + atlas.getImageFileNewName() + "\",\"encode\":\"" + atlas.getImageFileNewName() + "\",\"hide\":true}")); } else { final File atlasImageFile = atlas.getImageFile(); final String atlasPath = "data/" + atlasImageFile.getName(); loadableImages.put(new JSONObject("{\"nicename\":\"Atlas\",\"name\":\"" + atlas.getImageFileNewName() + "\",\"url\":\"" + atlasPath + "\",\"hide\":true}")); FileUtils.copyFile(atlasImageFile, new File(outputDir + "/" + atlasPath)); } builder.writeFile(atlasJavaScriptFile, compressedFileJs); } catch (final IOException ex) { System.err.println("Problem finding atlas file. Reason: " + ex.getMessage()); } } // additional images if (builder.isUseImages()) { final String[] imageArgs = cli.getOptionValues(ARG_IMAGE); if (imageArgs != null) { for (final String imageArg : imageArgs) { final File file = new File(imageArg); System.out.println("Including image " + file); final String filename = Utilities .replaceNonAlphanumericCharacters(Utilities.removeNiftiExtensions(file.getName())); if (builder.isLocal()) { loadableImages.put(new JSONObject( "{\"nicename\":\"" + Utilities.removeNiftiExtensions(file.getName()) + "\",\"name\":\"" + filename + "\",\"encode\":\"" + filename + "\"}")); final String sampleEncoded = Utilities.encodeImageFile(file); FileUtils.writeStringToFile(compressedFileJs, "var " + filename + "= \"" + sampleEncoded + "\";\n", "UTF-8", true); } else { final String filePath = "data/" + file.getName(); loadableImages.put(new JSONObject( "{\"nicename\":\"" + Utilities.removeNiftiExtensions(file.getName()) + "\",\"name\":\"" + filename + "\",\"url\":\"" + filePath + "\"}")); FileUtils.copyFile(file, new File(outputDir + "/" + filePath)); } } } } File tempFileJs = null; try { tempFileJs = builder.createTempFile(); } catch (final IOException ex) { System.err.println("Problem creating temp write file. Reason: " + ex.getMessage()); } // write image refs FileUtils.writeStringToFile(tempFileJs, "var " + PAPAYA_LOADABLE_IMAGES + " = " + loadableImages.toString() + ";\n", "UTF-8", true); // compress JS tempFileJs = builder.concatenateFiles(JS_FILES, "js", tempFileJs); System.out.println("Compressing JavaScript... "); FileUtils.writeStringToFile(compressedFileJs, "\n", "UTF-8", true); builder.compressJavaScript(tempFileJs, compressedFileJs, new YuiCompressorOptions()); //tempFileJs.deleteOnExit(); } catch (final IOException ex) { System.err.println("Problem concatenating JavaScript. Reason: " + ex.getMessage()); } // compress CSS final File compressedFileCss = new File(outputDir, OUTPUT_CSS_FILENAME); try { final File concatFile = builder.concatenateFiles(CSS_FILES, "css", null); System.out.println("Compressing CSS... "); builder.compressCSS(concatFile, compressedFileCss, new YuiCompressorOptions()); concatFile.deleteOnExit(); } catch (final IOException ex) { System.err.println("Problem concatenating CSS. Reason: " + ex.getMessage()); } // write HTML try { System.out.println("Writing HTML... "); if (builder.singleFile) { builder.writeHtml(outputDir, compressedFileJs, compressedFileCss, htmlParameters, title); } else { builder.writeHtml(outputDir, htmlParameters, title); } } catch (final IOException ex) { System.err.println("Problem writing HTML. Reason: " + ex.getMessage()); } System.out.println("Done! Output files located at " + outputDir); }
From source file:com.puzzle.module.send.sign.SignXml.java
public static void main(String[] args) throws Exception { ClientApi api = ClientApi.getSingletonClientApi(); String xml = FileUtils.readFileToString(new File("D:\\work\\myeclipse2014\\workspace\\Ukey\\sign.xml"), "UTF-8"); // ExecutorService s = Executors.newFixedThreadPool(3); for (int i = 0; i < 1; i++) { s.execute(new signTest(xml)); }/*from w ww .jav a 2s . c o m*/ // String signData = Base64.encodeBase64String(signedData); // FileUtils.writeStringToFile(new File("D:\\sign.txt"), signData, "UTF-8"); // System.out.println(signData); }
From source file:massbank.admin.Validator2.java
public static void main(String[] arguments) throws Exception { boolean haserror = false; if (arguments.length == 0) { Record record = validate(recordstringExample, ""); if (record == null) System.err.println("Error."); else/* w ww. j a v a2s.c om*/ System.out.println(record.toString()); } else { for (String filename : arguments) { recordstringExample = FileUtils.readFileToString(new File(filename), StandardCharsets.UTF_8); Record record = validate(recordstringExample, ""); if (record == null) { System.err.println("Error in " + filename); haserror = true; } else { //System.out.println("ok"); //System.out.println(record.toString()); } } } if (haserror) System.exit(1); }
From source file:de.micromata.tpsb.doc.StaticTestDocGenerator.java
public static void main(String[] args) { ParserConfig.Builder bCfg = new ParserConfig.Builder(); ParserConfig.Builder tCfg = new ParserConfig.Builder(); tCfg.generateIndividualFiles(true);/*from w w w. j a v a 2 s . c om*/ bCfg.generateIndividualFiles(true); List<String> la = Arrays.asList(args); Iterator<String> it = la.iterator(); boolean baseDirSet = false; boolean ignoreLocalSettings = false; List<String> addRepos = new ArrayList<String>(); StringResourceLoader.setRepository(StringResourceLoader.REPOSITORY_NAME_DEFAULT, new StringResourceRepositoryImpl()); try { while (it.hasNext()) { String arg = it.next(); String value = null; if ((value = getArgumentOption(it, arg, "--project-root", "-pr")) != null) { File f = new File(value); if (f.exists() == false) { System.err.print("project root doesn't exists: " + f.getAbsolutePath()); continue; } TpsbEnvironment.get().addProjectRoots(f); File ts = new File(f, "src/test"); if (ts.exists() == true) { tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath())); bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(ts.getAbsolutePath())); } continue; } if ((value = getArgumentOption(it, arg, "--test-input", "-ti")) != null) { File f = new File(value); if (f.exists() == false) { System.err.print("test-input doesn't exists: " + f.getAbsolutePath()); } tCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value)); bCfg.addSourceFileRespository(new FileSystemSourceFileRepository(value)); continue; } if ((value = getArgumentOption(it, arg, "--output-path", "-op")) != null) { if (baseDirSet == false) { tCfg.outputDir(value); bCfg.outputDir(value); TpsbEnvironment.setBaseDir(value); baseDirSet = true; } else { addRepos.add(value); } continue; } if ((value = getArgumentOption(it, arg, "--index-vmtemplate", "-ivt")) != null) { try { String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8); StringResourceRepository repo = StringResourceLoader.getRepository(); repo.putStringResource("customIndexTemplate", content, CharEncoding.UTF_8); tCfg.indexTemplate("customIndexTemplate"); } catch (IOException ex) { throw new RuntimeException( "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(), ex); } continue; } if ((value = getArgumentOption(it, arg, "--test-vmtemplate", "-tvt")) != null) { try { String content = FileUtils.readFileToString(new File(value), CharEncoding.UTF_8); StringResourceRepository repo = StringResourceLoader.getRepository(); repo.putStringResource("customTestTemplate", content, CharEncoding.UTF_8); tCfg.testTemplate("customTestTemplate"); } catch (IOException ex) { throw new RuntimeException( "Cannot load file " + new File(value).getAbsolutePath() + ": " + ex.getMessage(), ex); } continue; } if (arg.equals("--singlexml") == true) { tCfg.generateIndividualFiles(false); bCfg.generateIndividualFiles(false); } else if (arg.equals("--ignore-local-settings") == true) { ignoreLocalSettings = true; continue; } } } catch (RuntimeException ex) { System.err.print(ex.getMessage()); return; } if (ignoreLocalSettings == false) { readLocalSettings(bCfg, tCfg); } bCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Builder,*App,*builder")) // .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbBuilder.class)) // .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbApplication.class)) // ; tCfg// .addSourceFileFilter(new MatcherSourceFileFilter("*Test,*TestCase")) // .addSourceFileFilter(new AnnotationSourceFileFilter(TpsbTestSuite.class)) // ; StaticTestDocGenerator docGenerator = new StaticTestDocGenerator(bCfg.build(), tCfg.build()); TpsbEnvironment env = TpsbEnvironment.get(); if (addRepos.isEmpty() == false) { env.setIncludeRepos(addRepos); } docGenerator.parseTestBuilders(); docGenerator.parseTestCases(); }
From source file:marytts.tools.analysis.CopySynthesis.java
/** * @param args//ww w . j av a 2 s .co m */ public static void main(String[] args) throws Exception { String wavFilename = null; String labFilename = null; String pitchFilename = null; String textFilename = null; String locale = System.getProperty("locale"); if (locale == null) { throw new IllegalArgumentException("No locale given (-Dlocale=...)"); } for (String arg : args) { if (arg.endsWith(".txt")) textFilename = arg; else if (arg.endsWith(".wav")) wavFilename = arg; else if (arg.endsWith(".ptc")) pitchFilename = arg; else if (arg.endsWith(".lab")) labFilename = arg; else throw new IllegalArgumentException("Don't know how to treat argument: " + arg); } // The intonation contour double[] contour = null; double frameShiftTime = -1; if (pitchFilename == null) { // need to create pitch contour from wav file if (wavFilename == null) { throw new IllegalArgumentException("Need either a pitch file or a wav file"); } AudioInputStream ais = AudioSystem.getAudioInputStream(new File(wavFilename)); AudioDoubleDataSource audio = new AudioDoubleDataSource(ais); PitchFileHeader params = new PitchFileHeader(); params.fs = (int) ais.getFormat().getSampleRate(); F0TrackerAutocorrelationHeuristic tracker = new F0TrackerAutocorrelationHeuristic(params); tracker.pitchAnalyze(audio); frameShiftTime = tracker.getSkipSizeInSeconds(); contour = tracker.getF0Contour(); } else { // have a pitch file -- ignore any wav file PitchReaderWriter f0rw = new PitchReaderWriter(pitchFilename); if (f0rw.contour == null) { throw new NullPointerException("Cannot read f0 contour from " + pitchFilename); } contour = f0rw.contour; frameShiftTime = f0rw.header.skipSizeInSeconds; } assert contour != null; assert frameShiftTime > 0; // The ALLOPHONES data and labels if (labFilename == null) { throw new IllegalArgumentException("No label file given"); } if (textFilename == null) { throw new IllegalArgumentException("No text file given"); } MaryTranscriptionAligner aligner = new MaryTranscriptionAligner(); aligner.SetEnsureInitialBoundary(false); String labels = MaryTranscriptionAligner.readLabelFile(aligner.getEntrySeparator(), aligner.getEnsureInitialBoundary(), labFilename); MaryHttpClient mary = new MaryHttpClient(); String text = FileUtils.readFileToString(new File(textFilename), "ASCII"); ByteArrayOutputStream baos = new ByteArrayOutputStream(); mary.process(text, "TEXT", "ALLOPHONES", locale, null, null, baos); ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray()); DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance(); docFactory.setNamespaceAware(true); DocumentBuilder builder = docFactory.newDocumentBuilder(); Document doc = builder.parse(bais); aligner.alignXmlTranscriptions(doc, labels); assert doc != null; // durations double[] endTimes = new LabelfileDoubleDataSource(new File(labFilename)).getAllData(); assert endTimes.length == labels.split(Pattern.quote(aligner.getEntrySeparator())).length; // Now add durations and f0 targets to document double prevEnd = 0; NodeIterator ni = MaryDomUtils.createNodeIterator(doc, MaryXML.PHONE, MaryXML.BOUNDARY); for (int i = 0; i < endTimes.length; i++) { Element e = (Element) ni.nextNode(); if (e == null) throw new IllegalStateException("More durations than elements -- this should not happen!"); double durInSeconds = endTimes[i] - prevEnd; int durInMillis = (int) (1000 * durInSeconds); if (e.getTagName().equals(MaryXML.PHONE)) { e.setAttribute("d", String.valueOf(durInMillis)); e.setAttribute("end", new Formatter(Locale.US).format("%.3f", endTimes[i]).toString()); // f0 targets at beginning, mid, and end of phone StringBuilder f0String = new StringBuilder(); double startF0 = getF0(contour, frameShiftTime, prevEnd); if (startF0 != 0 && !Double.isNaN(startF0)) { f0String.append("(0,").append((int) startF0).append(")"); } double midF0 = getF0(contour, frameShiftTime, prevEnd + 0.5 * durInSeconds); if (midF0 != 0 && !Double.isNaN(midF0)) { f0String.append("(50,").append((int) midF0).append(")"); } double endF0 = getF0(contour, frameShiftTime, endTimes[i]); if (endF0 != 0 && !Double.isNaN(endF0)) { f0String.append("(100,").append((int) endF0).append(")"); } if (f0String.length() > 0) { e.setAttribute("f0", f0String.toString()); } } else { // boundary e.setAttribute("duration", String.valueOf(durInMillis)); } prevEnd = endTimes[i]; } if (ni.nextNode() != null) { throw new IllegalStateException("More elements than durations -- this should not happen!"); } // TODO: add pitch values String acoustparams = DomUtils.document2String(doc); System.out.println("ACOUSTPARAMS:"); System.out.println(acoustparams); }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step7CollectMTurkResults.java
public static void main(String[] args) throws Exception { // input dir - list of xml query containers // /home/user-ukp/research/data/dip/wp1-documents/step4-boiler-plate/ File inputDir = new File(args[0] + "/"); // MTurk result file // output dir File outputDir = new File(args[2]); if (!outputDir.exists()) { outputDir.mkdirs();//from ww w. j a v a 2 s . co m } // Folder with success files File mturkSuccessDir = new File(args[1]); Collection<File> files = FileUtils.listFiles(mturkSuccessDir, new String[] { "result" }, false); if (files.isEmpty()) { throw new IllegalArgumentException("Input folder is empty. " + mturkSuccessDir); } HashMap<String, List<MTurkAnnotation>> mturkAnnotations = new HashMap<>(); // parsing all CSV files for (File mturkCSVResultFile : files) { System.out.println("Parsing " + mturkCSVResultFile.getName()); MTurkOutputReader outputReader = new MTurkOutputReader( new HashSet<>(Arrays.asList("annotation", "workerid")), mturkCSVResultFile); // for fixing broken data input: for each hit, collect all sentence IDs Map<String, SortedSet<String>> hitSentences = new HashMap<>(); // first iteration: collect the sentences for (Map<String, String> record : outputReader) { String hitID = record.get("hitid"); if (!hitSentences.containsKey(hitID)) { hitSentences.put(hitID, new TreeSet<>()); } String relevantSentences = record.get("Answer.relevant_sentences"); String irrelevantSentences = record.get("Answer.irrelevant_sentences"); if (relevantSentences != null) { hitSentences.get(hitID).addAll(Arrays.asList(relevantSentences.split(","))); } if (irrelevantSentences != null) { hitSentences.get(hitID).addAll(Arrays.asList(irrelevantSentences.split(","))); } } // and now second iteration for (Map<String, String> record : outputReader) { String hitID = record.get("hitid"); String annotatorID = record.get("workerid"); String acceptTime = record.get("assignmentaccepttime"); String submitTime = record.get("assignmentsubmittime"); String relevantSentences = record.get("Answer.relevant_sentences"); String irrelevantSentences = record.get("Answer.irrelevant_sentences"); String reject = record.get("reject"); String filename[]; String comment; String clueWeb; String[] relevant = {}; String[] irrelevant = {}; filename = record.get("annotation").split("_"); String fileXml = filename[0]; clueWeb = filename[1].trim(); comment = record.get("Answer.comment"); if (relevantSentences != null) { relevant = relevantSentences.split(","); } if (irrelevantSentences != null) { irrelevant = irrelevantSentences.split(","); } // sanitizing data: if both relevant and irrelevant are empty, that's a bug // we're gonna look up all sentences from this HIT and treat this assignment // as if there were only irrelevant ones if (relevant.length == 0 && irrelevant.length == 0) { SortedSet<String> strings = hitSentences.get(hitID); irrelevant = new String[strings.size()]; strings.toArray(irrelevant); } if (reject != null) { System.out.println(" HIT " + hitID + " annotated by " + annotatorID + " was rejected "); } else { /* // relevant sentences is a comma-delimited string, // this regular expression is rather strange // it must contain digits, it might be that there is only one space or a comma or some other char // digits are the sentence ids. if relevant sentences do not contain digits then it is wrong if (relevantSentences.matches("^\\D*$") && irrelevantSentences.matches("^\\D*$")) { try { throw new IllegalStateException( "No annotations found for HIT " + hitID + " in " + fileXml + " for document " + clueWeb); } catch (IllegalStateException ex) { ex.printStackTrace(); } } */ MTurkAnnotation mturkAnnotation; try { mturkAnnotation = new MTurkAnnotation(hitID, annotatorID, acceptTime, submitTime, comment, clueWeb, relevant, irrelevant); } catch (IllegalArgumentException ex) { throw new IllegalArgumentException("Record: " + record, ex); } List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileXml); if (listOfAnnotations == null) { listOfAnnotations = new ArrayList<>(); } listOfAnnotations.add(mturkAnnotation); mturkAnnotations.put(fileXml, listOfAnnotations); } } // parser.close(); } // Debugging: output number of HITs of a query System.out.println("Accepted HITs for a query:"); for (Map.Entry e : mturkAnnotations.entrySet()) { ArrayList<MTurkAnnotation> a = (ArrayList<MTurkAnnotation>) e.getValue(); System.out.println(e.getKey() + " " + a.size()); } for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); String fileName = f.getName(); List<MTurkAnnotation> listOfAnnotations = mturkAnnotations.get(fileName); if (listOfAnnotations == null || listOfAnnotations.isEmpty()) { throw new IllegalStateException("No annotations for " + f.getName()); } for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { for (MTurkAnnotation mtAnnotation : listOfAnnotations) { String clueWeb = mtAnnotation.clueWeb; if (rankedResults.clueWebID.equals(clueWeb)) { List<QueryResultContainer.MTurkRelevanceVote> mTurkRelevanceVotes = rankedResults.mTurkRelevanceVotes; QueryResultContainer.MTurkRelevanceVote relevanceVote = new QueryResultContainer.MTurkRelevanceVote(); String annotatorID = mtAnnotation.annotatorID; String hitID = mtAnnotation.hitID; String acceptTime = mtAnnotation.acceptTime; String submitTime = mtAnnotation.submitTime; String comment = mtAnnotation.comment; String[] relevant = mtAnnotation.relevant; String[] irrelevant = mtAnnotation.irrelevant; relevanceVote.turkID = annotatorID.trim(); relevanceVote.hitID = hitID.trim(); relevanceVote.acceptTime = acceptTime.trim(); relevanceVote.submitTime = submitTime.trim(); relevanceVote.comment = comment != null ? comment.trim() : null; if (relevant.length == 0 && irrelevant.length == 0) { try { throw new IllegalStateException("the length of the annotations is 0" + rankedResults.clueWebID + " for HIT " + relevanceVote.hitID); } catch (IllegalStateException e) { e.printStackTrace(); } } for (String r : relevant) { String sentenceId = r.trim(); if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) { QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = sentenceId; singleSentenceVote.relevant = "true"; relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote); } } for (String r : irrelevant) { String sentenceId = r.trim(); if (!sentenceId.isEmpty() && sentenceId.matches("\\d+")) { QueryResultContainer.SingleSentenceRelevanceVote singleSentenceVote = new QueryResultContainer.SingleSentenceRelevanceVote(); singleSentenceVote.sentenceID = sentenceId; singleSentenceVote.relevant = "false"; relevanceVote.singleSentenceRelevanceVotes.add(singleSentenceVote); } } mTurkRelevanceVotes.add(relevanceVote); } } } File outputFile = new File(outputDir, f.getName()); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } }