List of usage examples for org.apache.commons.io FileUtils readLines
public static List readLines(File file, String encoding) throws IOException
From source file:com.bright.utils.rmDuplicateLines.java
public static void main(String args) { File monfile = new File(args); Set<String> userIdSet = new LinkedHashSet<String>(); if (monfile.isFile() && monfile.getName().endsWith(".txt")) { try {/*from w w w .ja v a 2s. co m*/ List<String> content = FileUtils.readLines(monfile, Charset.forName("UTF-8")); userIdSet.addAll(content); Iterator<String> itr = userIdSet.iterator(); StringBuffer output = new StringBuffer(); while (itr.hasNext()) { output.append(itr.next() + System.getProperty("line.separator")); } BufferedWriter out = new BufferedWriter(new FileWriter(monfile)); String outText = output.toString(); out.write(outText); out.close(); } catch (IOException e) { e.printStackTrace(); } } }
From source file:edu.cuhk.hccl.TripRealRatingsApp.java
public static void main(String[] args) throws IOException { File dir = new File(args[0]); File outFile = new File(args[1]); outFile.delete();//from w ww. jav a 2 s . c o m StringBuilder buffer = new StringBuilder(); for (File file : dir.listFiles()) { List<String> lines = FileUtils.readLines(file, "UTF-8"); String hotelID = file.getName().split("_")[1]; String author = null; boolean noContent = false; for (String line : lines) { if (line.startsWith("<Author>")) { try { author = line.split(">")[1].trim(); } catch (ArrayIndexOutOfBoundsException e) { System.out.println("[ERROR] An error occured on this line:"); System.out.println(line); continue; } } else if (line.startsWith("<Content>")) { // ignore records if they have no content String content = line.split(">")[1].trim(); if (content == null || content.equals("")) noContent = true; } else if (line.startsWith("<Rating>")) { String[] rates = line.split(">")[1].trim().split("\t"); if (noContent || rates.length != 8) continue; // Change missing rating from -1 to 0 for (int i = 0; i < rates.length; i++) { if (rates[i].equals("-1")) rates[i] = "0"; } buffer.append(author + "\t"); buffer.append(hotelID + "\t"); // overall buffer.append(rates[0] + "\t"); // location buffer.append(rates[3] + "\t"); // room buffer.append(rates[2] + "\t"); // service buffer.append(rates[6] + "\t"); // value buffer.append(rates[1] + "\t"); // cleanliness buffer.append(rates[4] + "\t"); buffer.append("\n"); } } // Write once for each file FileUtils.writeStringToFile(outFile, buffer.toString(), true); // Clear buffer buffer.setLength(0); System.out.printf("[INFO] Finished processing %s\n", file.getName()); } System.out.println("[INFO] All processinig are finished!"); }
From source file:com.github.stagirs.lingvo.build.SyntaxStatisticsBuilder.java
public static void main(String[] args) throws Exception { TObjectIntHashMap<String> map = new TObjectIntHashMap<String>(); for (String line : FileUtils.readLines(new File("annot.opcorpora.no_ambig.plain"), "utf-8")) { List<String> types = types(Annotation.parse(line)); if (types.isEmpty()) { continue; }/*w ww. j a v a 2s . c o m*/ map.adjustOrPutValue(types.get(0), 1, 1); map.adjustOrPutValue(" " + types.get(0), 1, 1); for (int i = 1; i < types.size(); i++) { map.adjustOrPutValue(types.get(i), 1, 1); map.adjustOrPutValue(types.get(i - 1) + " " + types.get(i), 1, 1); } map.adjustOrPutValue(types.get(types.size() - 1) + " ", 1, 1); } final List<String> result = new ArrayList<String>(); map.forEachEntry(new TObjectIntProcedure<String>() { @Override public boolean execute(String key, int count) { result.add(key + "\t" + count); return true; } }); FileUtils.writeLines(new File("src/main/resources/SyntaxStatistics"), "utf-8", result); }
From source file:com.gargoylesoftware.htmlunit.source.TestCaseCreator.java
/** * The entry point.//w w w . j a va 2 s . c o m * * @param args the arguments * @throws IOException if an error occurs */ public static void main(final String[] args) throws IOException { if (args.length == 0) { System.out.println("HTML file location is not provided"); return; } final File file = new File(args[0]); if (!file.exists()) { System.out.println("File does not exist " + file.getAbsolutePath()); } System.out.println(" /**"); System.out.println(" * @throws Exception if an error occurs"); System.out.println(" */"); System.out.println(" @Test"); System.out.println(" @Alerts()"); System.out.println(" public void test() throws Exception {"); final List<String> lines = FileUtils.readLines(file, TextUtil.DEFAULT_CHARSET); for (int i = 0; i < lines.size(); i++) { final String line = lines.get(i); if (i == 0) { System.out.println(" final String html = \"" + line.replace("\"", "\\\"") + "\\n\""); } else { System.out.print(" + \"" + line.replace("\"", "\\\"") + "\\n\""); if (i == lines.size() - 1) { System.out.print(";"); } System.out.println(); } } System.out.println(" loadPageWithAlerts2(html);"); System.out.println(" }"); }
From source file:com.sankalp.characterreader.CharacterReader.java
public static void main(String[] args) throws IOException { double learningRate = 0.55; List<NeuralNetwork.Layer> hiddenLayerList = new ArrayList(); hiddenLayerList.add(new NeuralNetwork.Layer(50)); NeuralNetwork neuralNetwork = new NeuralNetwork(new NeuralNetwork.Layer(28 * 28), new NeuralNetwork.Layer(10), hiddenLayerList, learningRate); int trainingSampleCount = 60000; TrainingData trainingData = new TrainingData(new File("/home/sankalpkulshrestha/mnist/train-images/"), trainingSampleCount, new File("/home/sankalpkulshrestha/mnist/train-labels.csv")); int totalEpochs = 30; for (int index = 0; index < totalEpochs; index++) { System.out.println("---------- EPOCH " + index + " ----------"); neuralNetwork.train(trainingData); TrainingData testData = new TrainingData(new File("/home/sankalpkulshrestha/mnist/test-images/"), 10000, new File("/home/sankalpkulshrestha/mnist/test-labels.csv")); System.out.println("Training over"); double accuracy = neuralNetwork.measureAccuracy(testData); System.out.println(accuracy); if (index < totalEpochs - 1) { trainingData.shuffle();/*from w w w . j a va2s. c o m*/ trainingData.reset(); } } System.out.println("Model trained. Enter image file names:"); BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); int number = -1; List<String> expectedOutputList = FileUtils .readLines(new File("/home/sankalpkulshrestha/mnist/test-labels.csv"), "UTF-8"); while ((number = Integer.parseInt(br.readLine())) != -1) { int output = neuralNetwork.test(new TrainingData.Sample( new File("/home/sankalpkulshrestha/mnist/test-images/" + number + ".jpg"), 0)); System.out.println(output); } }
From source file:de.tudarmstadt.ukp.experiments.argumentation.convincingness.sampling.Step5GoldLabelEstimator.java
@SuppressWarnings("unchecked") public static void main(String[] args) throws Exception { String inputDir = args[0];//from w w w .j a va 2s .c o m File outputDir = new File(args[1]); if (!outputDir.exists()) { outputDir.mkdirs(); } // we will process only a subset first List<AnnotatedArgumentPair> allArgumentPairs = new ArrayList<>(); Collection<File> files = IOHelper.listXmlFiles(new File(inputDir)); for (File file : files) { allArgumentPairs.addAll((List<AnnotatedArgumentPair>) XStreamTools.getXStream().fromXML(file)); } // collect turkers and csv List<String> turkerIDs = extractAndSortTurkerIDs(allArgumentPairs); String preparedCSV = prepareCSV(allArgumentPairs, turkerIDs); // save CSV and run MACE Path tmpDir = Files.createTempDirectory("mace"); File maceInputFile = new File(tmpDir.toFile(), "input.csv"); FileUtils.writeStringToFile(maceInputFile, preparedCSV, "utf-8"); File outputPredictions = new File(tmpDir.toFile(), "predictions.txt"); File outputCompetence = new File(tmpDir.toFile(), "competence.txt"); // run MACE MACE.main(new String[] { "--iterations", "500", "--threshold", String.valueOf(MACE_THRESHOLD), "--restarts", "50", "--outputPredictions", outputPredictions.getAbsolutePath(), "--outputCompetence", outputCompetence.getAbsolutePath(), maceInputFile.getAbsolutePath() }); // read back the predictions and competence List<String> predictions = FileUtils.readLines(outputPredictions, "utf-8"); // check the output if (predictions.size() != allArgumentPairs.size()) { throw new IllegalStateException("Wrong size of the predicted file; expected " + allArgumentPairs.size() + " lines but was " + predictions.size()); } String competenceRaw = FileUtils.readFileToString(outputCompetence, "utf-8"); String[] competence = competenceRaw.split("\t"); if (competence.length != turkerIDs.size()) { throw new IllegalStateException( "Expected " + turkerIDs.size() + " competence number, got " + competence.length); } // rank turkers by competence Map<String, Double> turkerIDCompetenceMap = new TreeMap<>(); for (int i = 0; i < turkerIDs.size(); i++) { turkerIDCompetenceMap.put(turkerIDs.get(i), Double.valueOf(competence[i])); } // sort by value descending Map<String, Double> sortedCompetences = IOHelper.sortByValue(turkerIDCompetenceMap, false); System.out.println("Sorted turker competences: " + sortedCompetences); // assign the gold label and competence for (int i = 0; i < allArgumentPairs.size(); i++) { AnnotatedArgumentPair annotatedArgumentPair = allArgumentPairs.get(i); String goldLabel = predictions.get(i).trim(); // might be empty if (!goldLabel.isEmpty()) { // so far the gold label has format aXXX_aYYY_a1, aXXX_aYYY_a2, or aXXX_aYYY_equal // strip now only the gold label annotatedArgumentPair.setGoldLabel(goldLabel); } // update turker competence for (AnnotatedArgumentPair.MTurkAssignment assignment : annotatedArgumentPair.mTurkAssignments) { String turkID = assignment.getTurkID(); int turkRank = getTurkerRank(turkID, sortedCompetences); assignment.setTurkRank(turkRank); double turkCompetence = turkerIDCompetenceMap.get(turkID); assignment.setTurkCompetence(turkCompetence); } } // now sort the data back according to their original file name Map<String, List<AnnotatedArgumentPair>> fileNameAnnotatedPairsMap = new HashMap<>(); for (AnnotatedArgumentPair argumentPair : allArgumentPairs) { String fileName = IOHelper.createFileName(argumentPair.getDebateMetaData(), argumentPair.getArg1().getStance()); if (!fileNameAnnotatedPairsMap.containsKey(fileName)) { fileNameAnnotatedPairsMap.put(fileName, new ArrayList<AnnotatedArgumentPair>()); } fileNameAnnotatedPairsMap.get(fileName).add(argumentPair); } // and save them to the output file for (Map.Entry<String, List<AnnotatedArgumentPair>> entry : fileNameAnnotatedPairsMap.entrySet()) { String fileName = entry.getKey(); List<AnnotatedArgumentPair> argumentPairs = entry.getValue(); File outputFile = new File(outputDir, fileName); // and save all sampled pairs into a XML file XStreamTools.toXML(argumentPairs, outputFile); System.out.println("Saved " + argumentPairs.size() + " pairs to " + outputFile); } }
From source file:com.acapulcoapp.alloggiatiweb.FileReader.java
public static void main(String[] args) throws UnknownHostException, IOException { // TODO code application logic here SpringApplication app = new SpringApplication(AcapulcoappApp.class); SimpleCommandLinePropertySource source = new SimpleCommandLinePropertySource(args); addDefaultProfile(app, source);/* w ww .j a v a 2s. c o m*/ ConfigurableApplicationContext context = app.run(args); initBeans(context); Map<LocalDate, List<List<String>>> map = new TreeMap<>(); List<File> files = new ArrayList<>(FileUtils.listFiles(new File("/Users/chiccomask/Downloads/ALLOGGIATI"), new String[] { "txt" }, true)); Collections.reverse(files); int count = 0; for (File file : files) { // List<String> allLines = FileUtils.readLines(file, "windows-1252"); List<String> allLines = FileUtils.readLines(file, "UTF-8"); for (int i = 0; i < allLines.size();) { count++; List<String> record = new ArrayList<>(); String line = allLines.get(i); String type = TIPO_ALLOGGIO.parse(line); switch (type) { case "16": record.add(line); i++; break; case "17": { record.add(line); boolean out = false; while (!out) { i++; if (i < allLines.size()) { String subline = allLines.get(i); String subtype = TIPO_ALLOGGIO.parse(subline); if (!subtype.equals("19")) { out = true; } else { record.add(subline); } } else { out = true; } } break; } case "18": { record.add(line); boolean out = false; while (!out) { i++; if (i < allLines.size()) { String subline = allLines.get(i); String subtype = TIPO_ALLOGGIO.parse(subline); if (!subtype.equals("20")) { out = true; } else { record.add(subline); } } else { out = true; } } break; } default: break; } LocalDate arrived = LocalDate.parse(DATA_ARRIVO.parse(line), DateTimeFormatter.ofPattern(DATE_PATTERN)); if (!map.containsKey(arrived)) { map.put(arrived, new ArrayList<>()); } map.get(arrived).add(record); } } for (LocalDate date : map.keySet()) { System.out.println(); System.out.println("process day " + date); for (List<String> record : map.get(date)) { System.out.println(); System.out.println("process record "); for (String line : record) { System.out.println(line); } CheckinRecord checkinRecord = new CheckinRecord(); //non lo setto per adesso String firstLine = record.get(0); String typeStr = TIPO_ALLOGGIO.parse(firstLine); CheckinType cht = checkinTypeRepository.find(typeStr); checkinRecord.setCheckinType(cht); int days = Integer.parseInt(PERMANENZA.parse(firstLine)); checkinRecord.setDays(days); checkinRecord.setArrived(date); boolean isMain = true; List<Person> others = new ArrayList<>(); for (String line : record) { Person p = extractPerson(line); if (p.getDistrictOfBirth() == null) { System.out.println("district of birth not found " + p); } List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth()); if (duplicates.isEmpty()) { System.out.println("add new person " + p.getId() + " " + p); personRepository.saveAndFlush(p); } else if (duplicates.size() == 1) { Person found = duplicates.get(0); if (p.getIdentityDocument() != null) { //we sorted by date so we suppose //the file version is newer so we update the entity p.setId(found.getId()); System.out.println("update person " + p.getId() + " " + p); personRepository.saveAndFlush(p); } else if (found.getIdentityDocument() != null) { //on db there are more data so I use them. p = found; System.out.println("use already saved person " + p.getId() + " " + p); } else { p.setId(found.getId()); System.out.println("update person " + p.getId() + " " + p); personRepository.saveAndFlush(p); } } else { throw new RuntimeException("More duplicated for " + p.getName()); } if (isMain) { checkinRecord.setMainPerson(p); isMain = false; } else { others.add(p); } } checkinRecord.setOtherPeople(new HashSet<>(others)); if (checkinRecordRepository.alreadyExists(checkinRecord.getMainPerson(), date) != null) { System.out.println("already exists " + date + " p " + checkinRecord.getMainPerson()); } else { System.out.println("save record "); checkinRecordRepository.saveAndFlush(checkinRecord); } } } // // if (type.equals("16")) { // List<String> record = new ArrayList<>(); // record.add(line); // keepOpen = false; // } // // map.get(arrived).add(record); // map.values().forEach((list) -> { // // for (String line : list) { // // Person p = null; // // try { // // p = extractPerson(line); // // List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth()); // // if (duplicates.isEmpty()) { // personRepository.saveAndFlush(p); // // } else if (duplicates.size() > 1) { // System.out.println(); // System.out.println("MULIPLE DUPLICATED"); // // for (Person dd : duplicates) { // System.out.println(dd); // } // System.out.println("* " + p); // throw new RuntimeException(); // } else { // //// if (!duplicates.get(0).getDistrictOfBirth().equals(p.getDistrictOfBirth())) { //// int index = 0; //// //// System.out.println(); //// System.out.println("DUPLICATED"); //// //// for (Person dd : duplicates) { //// System.out.println(dd); //// index++; //// } //// System.out.println("* " + p); //// System.out.println(file.getAbsolutePath() + " " + p); //// //// System.out.println(); //// System.out.println(); //// } //// duplicates.remove(0); //// personRepository.deleteInBatch(duplicates); //// System.out.println(); //// System.out.println("Seleziona scelta"); //// Scanner s = new Scanner(System.in); //// int selected; //// try { //// selected = s.nextInt(); //// } catch (InputMismatchException e) { //// selected = 0; //// } //// //// if (duplicates.size() <= selected) { //// personRepository.deleteInBatch(duplicates); //// personRepository.saveAndFlush(p); //// } else { //// duplicates.remove(selected); //// personRepository.deleteInBatch(duplicates); //// } // } // // } catch (Exception e) { // // System.out.println(); //// System.out.println("ERROR READING lineCount=" + allLines.indexOf(line) + " line=" + line); //// System.out.println(file.getAbsolutePath()); // System.out.println(p); // e.printStackTrace(); // System.out.println(); // } // } // }); context.registerShutdownHook(); System.exit(0); }
From source file:fakedatamaker.util.RandomFileUtil.java
/** * // www.j ava2 s. c o m * @param filePath * @param encoding * @return */ public static String getRandomFileLine(String filePath, String encoding) { String result = null; try { File file = new File(filePath); List<String> cityLines = FileUtils.readLines(file, encoding); int numberOfCities = cityLines.size(); Random r = new Random(); int cityLineNum = r.nextInt(numberOfCities); result = cityLines.get(cityLineNum); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } return result; }
From source file:com.nubits.nubot.utils.VersionInfo.java
private static HashMap getInfoFile() { String wdir = System.getProperty("user.dir"); String fp = wdir + "/" + Settings.INFO_FILE; File file = new File(fp); try {//w w w. j a v a 2s . c om List lines = FileUtils.readLines(file, "UTF-8"); HashMap km = new HashMap(); for (Object o : lines) { String l = "" + o; try { String[] a = l.split("="); km.put(a[0], a[1]); } catch (Exception e) { //ignore line with "=" } } return km; } catch (Exception e) { //throw e; LOG.error(e.toString()); } return null; }
From source file:com.github.stagirs.lingvo.syntax.disambiguity.mystem.MyStem.java
public static List<SyntaxItem[][]> process(List<String> text) throws IOException, InterruptedException { FileUtils.writeLines(new File("mystem_input"), "utf-8", text); Process process = Runtime.getRuntime() .exec(new String[] { "./mystem", "-c", "-i", "-d", "mystem_input", "mystem_output" }); process.waitFor();//ww w. jav a 2 s . c om InputStream is = process.getInputStream(); try { List<String> lines = FileUtils.readLines(new File("mystem_output"), "utf-8"); List<SyntaxItem[][]> resultList = new ArrayList<SyntaxItem[][]>(); for (String line : lines) { String[] parts = line.split(" "); SyntaxItem[][] result = new SyntaxItem[parts.length][]; for (int i = 0; i < result.length; i++) { if (parts[i].isEmpty()) { result[i] = new SyntaxItem[0]; continue; } if (!parts[i].contains("{")) { final String word = parts[i]; result[i] = new SyntaxItem[] { new SyntaxItem() { @Override public String getName() { return word; } @Override public String getType() { return ""; } @Override public double getScore() { return 0; } } }; continue; } final String word = parts[i].substring(0, parts[i].indexOf('{')); final String[] forms = parts[i].substring(parts[i].indexOf('{') + 1, parts[i].indexOf('}')) .split("\\|"); result[i] = new SyntaxItem[forms.length]; for (int j = 0; j < result[i].length; j++) { final String type = forms[j]; result[i][j] = new SyntaxItem() { @Override public String getName() { return word; } @Override public String getType() { return type; } @Override public double getScore() { return 0; } }; } } resultList.add(result); } return resultList; } finally { is.close(); } }