List of usage examples for org.apache.commons.io FileUtils readLines
public static List readLines(File file, String encoding) throws IOException
From source file:de.unidue.ltl.evalita.feat.IsName.java
private void init() throws TextClassificationException { if (namelist != null) { return;//from ww w .ja v a 2 s .c o m } namelist = new HashSet<String>(); for (File file : folder.listFiles()) { if (file.isHidden()) { continue; } if (file.isDirectory()) { throw new TextClassificationException("Did not expect that namelists are stored in subfolders"); } List<String> readLines = null; try { readLines = FileUtils.readLines(file, "utf-8"); } catch (IOException e) { throw new TextClassificationException(e); } for (String l : readLines) { if (l.startsWith("#")) { continue; } if (lowerCase) { l = l.toLowerCase(); } namelist.add(l); } } }
From source file:com.github.cereda.arara.langchecker.LanguageUtils.java
/** * Reads a list of files and returns a list of reports of each language. * @param files A list of files.//from w w w .ja v a 2s. co m * @return A list of reports of each language. */ public static List<LanguageReport> readLanguages(List<File> files) { // check if the provided list is empty if (files.isEmpty()) { // print error message System.err.println(WordUtils.wrap("Fatal exception: I could not find any languages " + "in the provided directory. I am afraid I won't be " + "be able to continue. Please make sure the " + "provided directory contains at least one language " + "to be analyzed. The application will halt now.", 60)); } // the resulting list List<LanguageReport> reports = new ArrayList<>(); // read each file of the list and extract // each task found for (File file : files) { try { // read each file into a list // of strings List<String> lines = FileUtils.readLines(file, "UTF-8"); // get the line analysis LanguageReport report = analyze(lines); // set the file reference report.setReference(file); // add to the list reports.add(report); } catch (IOException exception) { // print error message System.err.println(WordUtils.wrap("Fatal exception: an error was raised while " + "trying to read one of the languages. Please " + "make sure all languages in the provided " + "directory have read permission. I won't be " + "able to continue. The application will halt " + "now.", 60)); System.exit(1); } } // return the list of // analyzed languages return reports; }
From source file:edu.ehu.galan.lite.algorithms.ranked.supervised.tfidf.corpus.wikipedia.WikiCorpusStatistics.java
private void extractDirectory2(File file) { File[] listFiles = file.listFiles(); if (listFiles != null) { for (int i = 0; i < file.listFiles().length; i++) { File fil = listFiles[i]; if (fil != null) { boolean start = true; String sb = new String(); String line;/*w w w.j av a 2 s. c o m*/ List<String> readLines = null; try { readLines = FileUtils.readLines(fil, StandardCharsets.UTF_8.name()); } catch (IOException ex) { Logger.getLogger(WikiCorpusStatistics.class.getName()).log(Level.SEVERE, null, ex); } boolean star = false; for (int o = 0; o < readLines.size(); o++) { line = readLines.get(o); if (line.matches("<doc .*?>.*?")) { String[] split = line.split("<.*>"); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: if (split.length == 2) { sb = sb + split[1]; } } else if (line.matches("</doc>")) { numDocs++; int l = sb.toString().split("\\s+").length; numWords = l; sb = new String(); System.out.println(l); } else { sb = sb + (line); } } } } } }
From source file:com.github.stagirs.lingvo.build.MorphStateMachineBuilder.java
private static Map<String, List<WordForm[]>> getRaw2WordForms() throws IOException { Map<String, List<WordForm[]>> result = new HashMap<String, List<WordForm[]>>(); for (String line : FileUtils.readLines(new File("dict.opcorpora.plain"), "utf-8")) { if (line.isEmpty()) { continue; }// w ww. j a v a 2s . c o m Lemma lemma = Lemma.parse(line); WordForm norm = lemma.getNorm(); for (WordForm raw : lemma.getItems()) { add(result, raw, norm); } } return result; }
From source file:ee.ria.xroad.common.util.SystemMetrics.java
/** * @return a snapshot of current network statistics *///from ww w. j av a 2 s . co m public static NetStats getNetStats() { long bytesReceived = 0; long bytesTransmitted = 0; try { List<String> lines = FileUtils.readLines(new File(SystemProperties.getNetStatsFile()), StandardCharsets.UTF_8); for (String eachLine : lines) { String trimmedLine = eachLine.trim(); Pattern pattern = Pattern.compile("^eth[01]:[\\s\\d]*$"); Matcher matcher = pattern.matcher(trimmedLine); if (matcher.find()) { String[] parts = trimmedLine.split("\\s+"); // Indices according to format of /proc/net/dev bytesReceived += Long.parseLong(parts[BYTES_RECEIVED_IDX]); bytesTransmitted += Long.parseLong(parts[BYTES_TRANSMITTED_IDX]); } } return new NetStats(bytesReceived, bytesTransmitted); } catch (IOException e) { log.error("Did not manage to collect network statistics", e); return null; } }
From source file:jmupen.MyListSelectionListener.java
private void removeLines(int removeLine, File text) throws IOException { List<String> textLines = FileUtils.readLines(text, StandardCharsets.UTF_8); textLines.remove(removeLine);/* w ww . j a v a 2 s .c o m*/ StringBuilder builder = new StringBuilder(); for (String line : textLines) { builder.append(line).append(System.lineSeparator()); } FileUtils.writeStringToFile(text, builder.toString()); }
From source file:eu.crydee.alignment.aligner.cr.MetricsCR.java
@Override public void initialize(UimaContext context) throws ResourceInitializationException { super.initialize(context); whitelistFile = new File(whitelistFilePath); List<String> errs = new ArrayList<>(); if (!whitelistFile.isFile()) { errs.add("The run file doesn't resolve to a file."); } else if (!whitelistFile.canRead()) { errs.add("The run file can't be read."); }/*from w w w . j a va 2s .co m*/ if (!errs.isEmpty()) { logger.error(errs.stream().collect(Collectors.joining("\n"))); throw new ResourceInitializationException(); } try { Set<String> lines = new HashSet<>(FileUtils.readLines(whitelistFile, StandardCharsets.UTF_8)); it = Sets.cartesianProduct(lines, lines).iterator(); } catch (FileNotFoundException e) { logger.error("Couldn't find the run file.", e); throw new ResourceInitializationException(e); } catch (IOException e) { logger.error("Couldn't read the whitelist file.", e); throw new ResourceInitializationException(e); } currentIndex = 0; }
From source file:com.legstar.cob2xsd.Cob2XsdIOTest.java
/** * Check that the XML Schema produced has the correct encoding from a file * standpoint.//from w w w . j av a 2s .c o m */ @Test public void testFileOutputEncoding() { try { configProps.put(Cob2XsdConfig.XSD_ENCODING, "UTF-8"); configProps.put(Cob2XsdConfig.ADD_LEGSTAR_ANNOTATIONS, Boolean.toString(true)); Cob2XsdIO cob2xsd = new Cob2XsdIO(new Cob2XsdConfig(configProps)); File tempCobolFile = File.createTempFile("test", ".cob"); tempCobolFile.deleteOnExit(); FileUtils.write(tempCobolFile, " 01 A.\n 02 B PIC G(4) VALUE '?'.", "UTF8"); File xmlSchema = cob2xsd.translate(tempCobolFile, "UTF8", tempDir, "http://www.mycompany.com/test", null); for (String line : FileUtils.readLines(xmlSchema, "UTF8")) { if (line.contains("cobolName=\"B\"")) { assertTrue(line.contains("value=\"?\"")); } } xmlSchema.deleteOnExit(); } catch (Exception e) { e.printStackTrace(); fail(); } }
From source file:com.karumi.marvelapiclient.ApiClientTest.java
private String getContentFromFile(String fileName) throws IOException { fileName = getClass().getResource("/" + fileName).getFile(); File file = new File(fileName); List<String> lines = FileUtils.readLines(file, FILE_ENCODING); StringBuilder stringBuilder = new StringBuilder(); for (String line : lines) { stringBuilder.append(line);/* www . j a v a2 s . com*/ } return stringBuilder.toString(); }
From source file:de.unidue.ltl.pos.trainmodel.tc.PostPosUpdateTask.java
@Override public void execute(TaskContext aContext) throws Exception { File file = aContext.getFile(Constants.ID_OUTCOME_KEY, AccessMode.READONLY); List<String> readLines = FileUtils.readLines(file, "UTF-8"); Map<String, String> id2label = getId2LabelMapping(readLines); Map<String, String> label2id = getLabel2IdMapping(readLines); Map<String, List<String>> groupByCas = sortByCas(readLines); Map<String, List<String>> normSeqGroupByCas = normalizeSequenceId(groupByCas); Map<String, List<String>> normTokGroupByCas = normalizeTokenId(normSeqGroupByCas); StringBuilder head = new StringBuilder(); head.append(readLines.get(0));// www . ja v a 2s . c o m head.append("\n"); head.append(readLines.get(1)); StringBuilder body = new StringBuilder(); AnalysisEngine postProcessingEngine = AnalysisEngineFactory.createEngine(PostprocessTagger.class, PostprocessTagger.PARAM_NAMELIST_FOLDER, namedEntitiyFolder, PostprocessTagger.PARAM_NAMELIST_LOWER_CASE, true); for (String key : normTokGroupByCas.keySet()) { File testBinCasFolder = aContext.getFolder(InitTask.OUTPUT_KEY_TEST, AccessMode.READONLY); CollectionReader binCasReader = CollectionReaderFactory.createReader(BinaryCasReader.class, BinaryCasReader.PARAM_SOURCE_LOCATION, testBinCasFolder, BinaryCasReader.PARAM_PATTERNS, key + "_0.bin"); JCas jcas = JCasFactory.createJCas(); binCasReader.getNext(jcas.getCas()); p.configure(jcas.getCas()); List<TextClassificationSequence> sequences = JCasUtil.selectCovering(jcas, TextClassificationSequence.class, 0, jcas.getDocumentText().length()); List<String> list = normTokGroupByCas.get(key); for (String e : list) { int[] seqTokId = getSeqTokId(e); int seqid = seqTokId[0]; int tokid = seqTokId[1]; Pattern pattern = Pattern.compile("=([0-9]+);([0-9]+)"); Matcher m = pattern.matcher(e); m.find(); String prediction = id2label.get(m.group(1)); TextClassificationSequence sequence = sequences.get(seqid); List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, sequence.getBegin(), sequence.getEnd()); Token token = tokens.get(tokid); Type posTag = p.getTagType(prediction); POS pos = (POS) jcas.getCas().createAnnotation(posTag, token.getBegin(), token.getEnd()); pos.setPosValue(prediction); pos.addToIndexes(); token.setPos(pos); // prediction = updatePrediction(prediction, pos.getPosValue()); // evaluate(prediction, gold); // // sb.append(key + "_" + seq + "_" + tok + "=" + // label2id.get(prediction) + ";" // + label2id.get(gold) + "\n"); } postProcessingEngine.process(jcas); for (String e : list) { int[] seqTokId = getSeqTokId(e); int seqid = seqTokId[0]; int tokid = seqTokId[1]; Pattern pattern = Pattern.compile("=([0-9]+);([0-9]+)"); Matcher m = pattern.matcher(e); m.find(); String gold = id2label.get(m.group(2)); TextClassificationSequence sequence = sequences.get(seqid); List<Token> tokens = JCasUtil.selectCovered(jcas, Token.class, sequence.getBegin(), sequence.getEnd()); POS pos = tokens.get(tokid).getPos(); evaluate(pos.getPosValue(), gold); String predictedLabel = label2id.get(pos.getPosValue()); String goldLabel = label2id.get(gold); if (predictedLabel == null) { List<String> keySet = new ArrayList<String>(id2label.keySet()); int max = -1; for (String id : keySet) { int c = Integer.valueOf(id); if (c > max) { max = c; } } max++; label2id.put(pos.getPosValue(), "" + max); id2label.put("" + max, pos.getPosValue()); head.append(" " + max + "=" + pos.getPosValue()); predictedLabel = label2id.get(pos.getPosValue()); } body.append(key + "_" + seqid + "_" + tokid + "=" + predictedLabel + ";" + goldLabel + "\n"); } // writeUpdatedJcas(aContext, jcas); } writeAccuracyFile(aContext); String out = head.toString() + "\n" + body.toString(); writeUpdatedId2OutcomeReport(aContext, out); }