List of usage examples for org.apache.commons.io FileUtils listFiles
public static Collection listFiles(File directory, String[] extensions, boolean recursive)
From source file:com.sangupta.keepwalking.MergeRepo.java
/** * @param args//from ww w . j a v a2s . c om * @throws IOException */ public static void main(String[] args) throws IOException { if (args.length != 3) { usage(); return; } final String previousRepo = args[0]; final String newerRepo = args[1]; final String mergedRepo = args[2]; final File previous = new File(previousRepo); final File newer = new File(newerRepo); final File merged = new File(mergedRepo); if (!(previous.exists() && previous.isDirectory())) { System.out.println("The previous version does not exists or is not a directory."); return; } if (!(newer.exists() && newer.isDirectory())) { System.out.println("The newer version does not exists or is not a directory."); return; } final IOFileFilter directoryFilter = FileFilterUtils.makeCVSAware(FileFilterUtils.makeSVNAware(null)); final Collection<File> olderFiles = FileUtils.listFiles(previous, TrueFileFilter.TRUE, directoryFilter); final Collection<File> newerFiles = FileUtils.listFiles(newer, TrueFileFilter.TRUE, directoryFilter); // build a list of unique paths System.out.println("Reading files from older version..."); List<String> olderPaths = new ArrayList<String>(); for (File oldFile : olderFiles) { olderPaths.add(getRelativePath(oldFile, previous)); } System.out.println("Reading files from newer version..."); List<String> newerPaths = new ArrayList<String>(); for (File newerFile : newerFiles) { newerPaths.add(getRelativePath(newerFile, newer)); } // find which files have been removed from Perforce depot List<String> filesRemoved = new ArrayList<String>(olderPaths); filesRemoved.removeAll(newerPaths); System.out.println("Files removed in newer version: " + filesRemoved.size()); for (String removed : filesRemoved) { System.out.print(" "); System.out.println(removed); } // find which files have been added in Perforce depot List<String> filesAdded = new ArrayList<String>(newerPaths); filesAdded.removeAll(olderPaths); System.out.println("Files added in newer version: " + filesAdded.size()); for (String added : filesAdded) { System.out.print(" "); System.out.println(added); } // find which files are common // now check if they have modified or not newerPaths.retainAll(olderPaths); List<String> modified = checkModifiedFiles(newerPaths, previous, newer); System.out.println("Files modified in newer version: " + modified.size()); for (String modify : modified) { System.out.print(" "); System.out.println(modify); } // clean any previous existence of merged repo System.out.println("Cleaning any previous merged repositories..."); if (merged.exists() && merged.isDirectory()) { FileUtils.deleteDirectory(merged); } System.out.println("Merging from newer to older repository..."); // copy the original SVN repo to merged FileUtils.copyDirectory(previous, merged); // now remove all files that need to be for (String removed : filesRemoved) { File toRemove = new File(merged, removed); toRemove.delete(); } // now add all files that are new in perforce for (String added : filesAdded) { File toAdd = new File(newer, added); File destination = new File(merged, added); FileUtils.copyFile(toAdd, destination); } // now over-write modified files for (String changed : modified) { File change = new File(newer, changed); File destination = new File(merged, changed); destination.delete(); FileUtils.copyFile(change, destination); } System.out.println("Done merging."); }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step10RemoveEmptyDocuments.java
public static void main(String[] args) throws IOException { // input dir - list of xml query containers File inputDir = new File(args[0]); // output dir File outputDir = new File(args[1]); if (!outputDir.exists()) { outputDir.mkdirs();//from w ww .j a va2 s. c o m } boolean crop = args.length >= 3 && "crop".equals(args[2]); // first find the maximum of zero-sized documents int maxMissing = 7; /* // iterate over query containers for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); // first find the maximum of zero-sized documents in a query int missingInQuery = 0; for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { // boilerplate removal if (rankedResults.plainText == null || rankedResults.plainText.isEmpty()) { missingInQuery++; } } maxMissing = Math.max(missingInQuery, maxMissing); } */ System.out.println("Max zeroLengthDocuments in query: " + maxMissing); // max is 7 = we're cut-off at 93 // iterate over query containers for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); List<QueryResultContainer.SingleRankedResult> nonEmptyDocsList = new ArrayList<>(); for (QueryResultContainer.SingleRankedResult rankedResults : queryResultContainer.rankedResults) { // collect non-empty documents if (rankedResults.plainText != null && !rankedResults.plainText.isEmpty()) { nonEmptyDocsList.add(rankedResults); } } System.out.println("Non-empty docs coune: " + nonEmptyDocsList.size()); if (crop) { // now cut at 93 nonEmptyDocsList = nonEmptyDocsList.subList(0, (100 - maxMissing)); System.out.println("After cropping: " + nonEmptyDocsList.size()); } System.out.println("After cleaning: " + nonEmptyDocsList.size()); queryResultContainer.rankedResults.clear(); queryResultContainer.rankedResults.addAll(nonEmptyDocsList); // and save the query to output dir File outputFile = new File(outputDir, queryResultContainer.qID + ".xml"); FileUtils.writeStringToFile(outputFile, queryResultContainer.toXML(), "utf-8"); System.out.println("Finished " + outputFile); } }
From source file:PlyBounder.java
public static void main(String[] args) { // Get the commandline arguments Options options = new Options(); // Available options Option plyPath = OptionBuilder.withArgName("dir").hasArg() .withDescription("directory containing input .ply files").create("plyPath"); Option boundingbox = OptionBuilder.withArgName("string").hasArg() .withDescription("bounding box in WKT notation").create("boundingbox"); Option outputPlyFile = OptionBuilder.withArgName("file").hasArg().withDescription("output PLY file name") .create("outputPlyFile"); options.addOption(plyPath);/*from w w w.j av a 2 s . co m*/ options.addOption(boundingbox); options.addOption(outputPlyFile); String plydir = "."; String boundingboxstr = ""; String outputfilename = ""; CommandLineParser parser = new DefaultParser(); try { // parse the command line arguments CommandLine line = parser.parse(options, args); boundingboxstr = line.getOptionValue("boundingbox"); outputfilename = line.getOptionValue("outputPlyFile"); if (line.hasOption("plyPath")) { // print the value of block-size plydir = line.getOptionValue("plyPath"); System.out.println("Using plyPath=" + plydir); } else { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp("PlyBounder", options); } //System.out.println( "plyPath=" + line.getOptionValue( "plyPath" ) ); } catch (ParseException exp) { System.err.println("Error getting arguments: " + exp.getMessage()); } // input directory // Get list of files File dir = new File(plydir); //System.out.println("Getting all files in " + dir.getCanonicalPath()); List<File> files = (List<File>) FileUtils.listFiles(dir, new String[] { "ply", "PLY" }, false); for (File file : files) { try { System.out.println("file=" + file.getCanonicalPath()); } catch (IOException e) { e.printStackTrace(); } } String sometempfile = "magweg.wkt"; String s = null; // Loop through .ply files in directory for (File file : files) { try { String cmdl[] = { "./ply-tool.py", "intersection", file.getCanonicalPath(), boundingboxstr, sometempfile }; //System.out.println("Running: " + Arrays.toString(cmdl)); Process p = Runtime.getRuntime().exec(cmdl); BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); // read the output from the command System.out.println("cmdout:\n"); while ((s = stdInput.readLine()) != null) { System.out.println(s); } // read any errors from the attempted command System.out.println("cmderr:\n"); while ((s = stdError.readLine()) != null) { System.out.println(s); } } catch (IOException e) { e.printStackTrace(); } } // Write new .ply file //ply-tool write setfile outputPlyFile try { String cmdl = "./ply-tool.py write " + sometempfile + " " + outputfilename; System.out.println("Running: " + cmdl); Process p = Runtime.getRuntime().exec(cmdl); BufferedReader stdInput = new BufferedReader(new InputStreamReader(p.getInputStream())); BufferedReader stdError = new BufferedReader(new InputStreamReader(p.getErrorStream())); // read the output from the command System.out.println("cmdout:\n"); while ((s = stdInput.readLine()) != null) { System.out.println(s); } // read any errors from the attempted command System.out.println("cmderr:\n"); while ((s = stdError.readLine()) != null) { System.out.println(s); } } catch (IOException e) { e.printStackTrace(); } // Done System.out.println("Done"); }
From source file:de.jwi.zip.Zipper.java
public static void main(String[] args) throws Exception { ZipOutputStream z = new ZipOutputStream(new FileOutputStream("d:/temp/myfirst.zip")); IOFileFilter filter = new IOFileFilter() { public boolean accept(java.io.File file) { return true; }//from w w w . ja v a 2s . c om public boolean accept(java.io.File dir, java.lang.String name) { return true; } }; Collection c = FileUtils.listFiles(new File("/java/javadocs/j2sdk-1.4.1/docs/tooldocs"), filter, filter); new Zipper().zip(z, c, new File("/java/javadocs/j2sdk-1.4.1")); z.close(); }
From source file:com.ontotext.s4.SBTDemo.Main.java
public static void main(String[] args) { /*/*from ww w . jav a 2 s . c o m*/ * Read log4j properties file. */ org.apache.log4j.PropertyConfigurator.configure(args.length >= 1 ? args[1] : DEFAULT_LOG4J_FILE); /* * Read properties file List all annotated files. We should * use absolute path to the files */ init(args); ProcessingDocuments processingDocuments = new ProcessingDocuments( programProperties.getProperty(PropertiesNames.S4_API_KEY), programProperties.getProperty(PropertiesNames.S4_API_PASS), programProperties.getProperty(PropertiesNames.RAW_FOLDER), programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER), programProperties.getProperty(PropertiesNames.SERVICE), programProperties.getProperty(PropertiesNames.MIME_TYPE), programProperties.getProperty(PropertiesNames.RESPONSE_FORMAT), Integer.parseInt(programProperties.getProperty(PropertiesNames.NUMBER_OF_THREADS))); processingDocuments.ProcessData(); File directory = new File(programProperties.getProperty(PropertiesNames.ANNOTATED_FOLDER)); listOfAllAnnotatedFiles = FileUtils.listFiles(directory, new RegexFileFilter("^(.*?)"), DirectoryFileFilter.DIRECTORY); RepoManager repoManager = new RepoManager(programProperties.getProperty(PropertiesNames.REPOSITORY_URL)); JsonToRDF jsonToRdfParser = new JsonToRDF(programProperties.getProperty(PropertiesNames.MIME_TYPE), programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER)); for (File file : listOfAllAnnotatedFiles) { String fileContent = null; try { fileContent = FileUtils.readFileToString(file, "UTF-8"); } catch (IOException e) { logger.error(e); } Model graph = jsonToRdfParser.wirteDataToRDF(fileContent, file.getName(), programProperties.getProperty(PropertiesNames.RDFIZE_FOLDER)); try { repoManager.sendDataTOGraphDB(graph); } catch (RepositoryException e) { logger.error(e); } } repoManager.close(); }
From source file:de.micromata.genome.gwiki.tools.PatchJavaHeader.java
public static void main(String[] args) { String baseDir = args[0];//from w ww . ja v a 2s. c o m Collection<File> col = FileUtils.listFiles(new File(baseDir), new String[] { "java" }, true); for (File f : col) { System.out.println("file: " + f.getAbsolutePath()); patchFile(f); } }
From source file:io.apiman.tools.i18n.TemplateScanner.java
public static void main(String[] args) throws IOException { if (args == null || args.length != 1) { System.out.println("Template directory not provided (no path provided)."); System.exit(1);//from w w w .j a v a 2s.co m } File templateDir = new File(args[0]); if (!templateDir.isDirectory()) { System.out.println("Template directory not provided (provided path is not a directory)."); System.exit(1); } if (!new File(templateDir, "dash.html").isFile()) { System.out.println("Template directory not provided (dash.html not found)."); System.exit(1); } File outputDir = new File(templateDir, "../../../../../../tools/i18n/target"); if (!outputDir.isDirectory()) { System.out.println("Output directory not found: " + outputDir); System.exit(1); } File outputFile = new File(outputDir, "scanner-messages.properties"); if (outputFile.isFile() && !outputFile.delete()) { System.out.println("Couldn't delete the old messages.properties: " + outputFile); System.exit(1); } System.out.println("Starting scan."); System.out.println("Scanning template directory: " + templateDir.getAbsolutePath()); String[] extensions = { "html", "include" }; Collection<File> files = FileUtils.listFiles(templateDir, extensions, true); TreeMap<String, String> strings = new TreeMap<>(); for (File file : files) { System.out.println("\tScanning file: " + file); scanFile(file, strings); } outputMessages(strings, outputFile); System.out.println("Scan complete. Scanned " + files.size() + " files and discovered " + strings.size() + " translation strings."); }
From source file:com.acapulcoapp.alloggiatiweb.FileReader.java
public static void main(String[] args) throws UnknownHostException, IOException { // TODO code application logic here SpringApplication app = new SpringApplication(AcapulcoappApp.class); SimpleCommandLinePropertySource source = new SimpleCommandLinePropertySource(args); addDefaultProfile(app, source);//from ww w .j av a 2s . co m ConfigurableApplicationContext context = app.run(args); initBeans(context); Map<LocalDate, List<List<String>>> map = new TreeMap<>(); List<File> files = new ArrayList<>(FileUtils.listFiles(new File("/Users/chiccomask/Downloads/ALLOGGIATI"), new String[] { "txt" }, true)); Collections.reverse(files); int count = 0; for (File file : files) { // List<String> allLines = FileUtils.readLines(file, "windows-1252"); List<String> allLines = FileUtils.readLines(file, "UTF-8"); for (int i = 0; i < allLines.size();) { count++; List<String> record = new ArrayList<>(); String line = allLines.get(i); String type = TIPO_ALLOGGIO.parse(line); switch (type) { case "16": record.add(line); i++; break; case "17": { record.add(line); boolean out = false; while (!out) { i++; if (i < allLines.size()) { String subline = allLines.get(i); String subtype = TIPO_ALLOGGIO.parse(subline); if (!subtype.equals("19")) { out = true; } else { record.add(subline); } } else { out = true; } } break; } case "18": { record.add(line); boolean out = false; while (!out) { i++; if (i < allLines.size()) { String subline = allLines.get(i); String subtype = TIPO_ALLOGGIO.parse(subline); if (!subtype.equals("20")) { out = true; } else { record.add(subline); } } else { out = true; } } break; } default: break; } LocalDate arrived = LocalDate.parse(DATA_ARRIVO.parse(line), DateTimeFormatter.ofPattern(DATE_PATTERN)); if (!map.containsKey(arrived)) { map.put(arrived, new ArrayList<>()); } map.get(arrived).add(record); } } for (LocalDate date : map.keySet()) { System.out.println(); System.out.println("process day " + date); for (List<String> record : map.get(date)) { System.out.println(); System.out.println("process record "); for (String line : record) { System.out.println(line); } CheckinRecord checkinRecord = new CheckinRecord(); //non lo setto per adesso String firstLine = record.get(0); String typeStr = TIPO_ALLOGGIO.parse(firstLine); CheckinType cht = checkinTypeRepository.find(typeStr); checkinRecord.setCheckinType(cht); int days = Integer.parseInt(PERMANENZA.parse(firstLine)); checkinRecord.setDays(days); checkinRecord.setArrived(date); boolean isMain = true; List<Person> others = new ArrayList<>(); for (String line : record) { Person p = extractPerson(line); if (p.getDistrictOfBirth() == null) { System.out.println("district of birth not found " + p); } List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth()); if (duplicates.isEmpty()) { System.out.println("add new person " + p.getId() + " " + p); personRepository.saveAndFlush(p); } else if (duplicates.size() == 1) { Person found = duplicates.get(0); if (p.getIdentityDocument() != null) { //we sorted by date so we suppose //the file version is newer so we update the entity p.setId(found.getId()); System.out.println("update person " + p.getId() + " " + p); personRepository.saveAndFlush(p); } else if (found.getIdentityDocument() != null) { //on db there are more data so I use them. p = found; System.out.println("use already saved person " + p.getId() + " " + p); } else { p.setId(found.getId()); System.out.println("update person " + p.getId() + " " + p); personRepository.saveAndFlush(p); } } else { throw new RuntimeException("More duplicated for " + p.getName()); } if (isMain) { checkinRecord.setMainPerson(p); isMain = false; } else { others.add(p); } } checkinRecord.setOtherPeople(new HashSet<>(others)); if (checkinRecordRepository.alreadyExists(checkinRecord.getMainPerson(), date) != null) { System.out.println("already exists " + date + " p " + checkinRecord.getMainPerson()); } else { System.out.println("save record "); checkinRecordRepository.saveAndFlush(checkinRecord); } } } // // if (type.equals("16")) { // List<String> record = new ArrayList<>(); // record.add(line); // keepOpen = false; // } // // map.get(arrived).add(record); // map.values().forEach((list) -> { // // for (String line : list) { // // Person p = null; // // try { // // p = extractPerson(line); // // List<Person> duplicates = personRepository.findDuplicates(p.getSurname(), p.getName(), p.getDateOfBirth()); // // if (duplicates.isEmpty()) { // personRepository.saveAndFlush(p); // // } else if (duplicates.size() > 1) { // System.out.println(); // System.out.println("MULIPLE DUPLICATED"); // // for (Person dd : duplicates) { // System.out.println(dd); // } // System.out.println("* " + p); // throw new RuntimeException(); // } else { // //// if (!duplicates.get(0).getDistrictOfBirth().equals(p.getDistrictOfBirth())) { //// int index = 0; //// //// System.out.println(); //// System.out.println("DUPLICATED"); //// //// for (Person dd : duplicates) { //// System.out.println(dd); //// index++; //// } //// System.out.println("* " + p); //// System.out.println(file.getAbsolutePath() + " " + p); //// //// System.out.println(); //// System.out.println(); //// } //// duplicates.remove(0); //// personRepository.deleteInBatch(duplicates); //// System.out.println(); //// System.out.println("Seleziona scelta"); //// Scanner s = new Scanner(System.in); //// int selected; //// try { //// selected = s.nextInt(); //// } catch (InputMismatchException e) { //// selected = 0; //// } //// //// if (duplicates.size() <= selected) { //// personRepository.deleteInBatch(duplicates); //// personRepository.saveAndFlush(p); //// } else { //// duplicates.remove(selected); //// personRepository.deleteInBatch(duplicates); //// } // } // // } catch (Exception e) { // // System.out.println(); //// System.out.println("ERROR READING lineCount=" + allLines.indexOf(line) + " line=" + line); //// System.out.println(file.getAbsolutePath()); // System.out.println(p); // e.printStackTrace(); // System.out.println(); // } // } // }); context.registerShutdownHook(); System.exit(0); }
From source file:com.l2jserver.model.template.SkillTemplateConverter.java
public static void main(String[] args) throws SQLException, IOException, ClassNotFoundException, JAXBException { Class.forName("com.mysql.jdbc.Driver"); final File target = new File("data/templates"); final JAXBContext c = JAXBContext.newInstance(SkillTemplate.class, LegacySkillList.class); final Connection conn = DriverManager.getConnection(JDBC_URL, JDBC_USERNAME, JDBC_PASSWORD); System.out.println("Generating template XML files..."); c.generateSchema(new SchemaOutputResolver() { @Override/*w w w .ja v a 2 s . co m*/ public Result createOutput(String namespaceUri, String suggestedFileName) throws IOException { return new StreamResult(new File(target, suggestedFileName)); } }); try { final Unmarshaller u = c.createUnmarshaller(); final Marshaller m = c.createMarshaller(); m.setProperty(Marshaller.JAXB_FORMATTED_OUTPUT, true); m.setProperty(Marshaller.JAXB_NO_NAMESPACE_SCHEMA_LOCATION, "skill"); m.setProperty(Marshaller.JAXB_SCHEMA_LOCATION, "skill ../skill.xsd"); Collection<File> files = FileUtils.listFiles(new File(LEGACY_SKILL_FOLDER), new String[] { "xml" }, true); for (final File legacyFile : files) { LegacySkillList list = (LegacySkillList) u.unmarshal(legacyFile); for (final LegacySkill legacySkill : list.skills) { SkillTemplate t = fillSkill(legacySkill); final File file = new File(target, "skill/" + t.id.getID() + (t.getName() != null ? "-" + camelCase(t.getName()) : "") + ".xml"); templates.add(t); try { m.marshal(t, getXMLSerializer(new FileOutputStream(file))); } catch (MarshalException e) { System.err.println( "Could not generate XML template file for " + t.getName() + " - " + t.getID()); file.delete(); } } } System.out.println("Generated " + templates.size() + " templates"); System.gc(); System.out.println("Free: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().freeMemory())); System.out.println("Total: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().totalMemory())); System.out.println("Used: " + FileUtils.byteCountToDisplaySize( Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())); System.out.println("Max: " + FileUtils.byteCountToDisplaySize(Runtime.getRuntime().maxMemory())); } finally { conn.close(); } }
From source file:de.tudarmstadt.ukp.experiments.dip.wp1.documents.Step6HITPreparator.java
public static void main(String[] args) throws Exception { // input dir - list of xml query containers // step5-linguistic-annotation/ System.err.println("Starting step 6 HIT Preparation"); File inputDir = new File(args[0]); // output dir File outputDir = new File(args[1]); if (outputDir.exists()) { outputDir.delete();/*from www. j av a 2s. c o m*/ } outputDir.mkdir(); List<String> queries = new ArrayList<>(); // iterate over query containers int countClueWeb = 0; int countSentence = 0; for (File f : FileUtils.listFiles(inputDir, new String[] { "xml" }, false)) { QueryResultContainer queryResultContainer = QueryResultContainer .fromXML(FileUtils.readFileToString(f, "utf-8")); if (queries.contains(f.getName()) || queries.size() == 0) { // groups contain only non-empty documents Map<Integer, List<QueryResultContainer.SingleRankedResult>> groups = new HashMap<>(); // split to groups according to number of sentences for (QueryResultContainer.SingleRankedResult rankedResult : queryResultContainer.rankedResults) { if (rankedResult.originalXmi != null) { byte[] bytes = new BASE64Decoder() .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes())); JCas jCas = JCasFactory.createJCas(); XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas()); Collection<Sentence> sentences = JCasUtil.select(jCas, Sentence.class); int groupId = sentences.size() / 40; if (rankedResult.originalXmi == null) { System.err.println("Empty document: " + rankedResult.clueWebID); } else { if (!groups.containsKey(groupId)) { groups.put(groupId, new ArrayList<>()); } } //handle it groups.get(groupId).add(rankedResult); countClueWeb++; } } for (Map.Entry<Integer, List<QueryResultContainer.SingleRankedResult>> entry : groups.entrySet()) { Integer groupId = entry.getKey(); List<QueryResultContainer.SingleRankedResult> rankedResults = entry.getValue(); // make sure the results are sorted // DEBUG // for (QueryResultContainer.SingleRankedResult r : rankedResults) { // System.out.print(r.rank + "\t"); // } Collections.sort(rankedResults, (o1, o2) -> o1.rank.compareTo(o2.rank)); // iterate over results for one query and group for (int i = 0; i < rankedResults.size() && i < TOP_RESULTS_PER_GROUP; i++) { QueryResultContainer.SingleRankedResult rankedResult = rankedResults.get(i); QueryResultContainer.SingleRankedResult r = rankedResults.get(i); int rank = r.rank; MustacheFactory mf = new DefaultMustacheFactory(); Mustache mustache = mf.compile("template/template.html"); String queryId = queryResultContainer.qID; String query = queryResultContainer.query; // make the first letter uppercase query = query.substring(0, 1).toUpperCase() + query.substring(1); List<String> relevantInformationExamples = queryResultContainer.relevantInformationExamples; List<String> irrelevantInformationExamples = queryResultContainer.irrelevantInformationExamples; byte[] bytes = new BASE64Decoder() .decodeBuffer(new ByteArrayInputStream(rankedResult.originalXmi.getBytes())); JCas jCas = JCasFactory.createJCas(); XmiCasDeserializer.deserialize(new ByteArrayInputStream(bytes), jCas.getCas()); List<generators.Sentence> sentences = new ArrayList<>(); List<Integer> paragraphs = new ArrayList<>(); paragraphs.add(0); for (WebParagraph webParagraph : JCasUtil.select(jCas, WebParagraph.class)) { for (Sentence s : JCasUtil.selectCovered(Sentence.class, webParagraph)) { String sentenceBegin = String.valueOf(s.getBegin()); generators.Sentence sentence = new generators.Sentence(s.getCoveredText(), sentenceBegin); sentences.add(sentence); countSentence++; } int SentenceID = paragraphs.get(paragraphs.size() - 1); if (sentences.size() > 120) while (SentenceID < sentences.size()) { if (!paragraphs.contains(SentenceID)) paragraphs.add(SentenceID); SentenceID = SentenceID + 120; } paragraphs.add(sentences.size()); } System.err.println("Output dir: " + outputDir); int startID = 0; int endID; for (int j = 0; j < paragraphs.size(); j++) { endID = paragraphs.get(j); int sentLength = endID - startID; if (sentLength > 120 || j == paragraphs.size() - 1) { if (sentLength > 120) { endID = paragraphs.get(j - 1); j--; } sentLength = endID - startID; if (sentLength <= 40) groupId = 40; else if (sentLength <= 80 && sentLength > 40) groupId = 80; else if (sentLength > 80) groupId = 120; File folder = new File(outputDir + "/" + groupId); if (!folder.exists()) { System.err.println("creating directory: " + outputDir + "/" + groupId); boolean result = false; try { folder.mkdir(); result = true; } catch (SecurityException se) { //handle it } if (result) { System.out.println("DIR created"); } } String newHtmlFile = folder.getAbsolutePath() + "/" + f.getName() + "_" + rankedResult.clueWebID + "_" + sentLength + ".html"; System.err.println("Printing a file: " + newHtmlFile); File newHTML = new File(newHtmlFile); int t = 0; while (newHTML.exists()) { newHTML = new File(folder.getAbsolutePath() + "/" + f.getName() + "_" + rankedResult.clueWebID + "_" + sentLength + "." + t + ".html"); t++; } mustache.execute(new PrintWriter(new FileWriter(newHTML)), new generators(query, relevantInformationExamples, irrelevantInformationExamples, sentences.subList(startID, endID), queryId, rank)) .flush(); startID = endID; } } } } } } System.out.println("Printed " + countClueWeb + " documents with " + countSentence + " sentences"); }