List of usage examples for org.apache.commons.io IOUtils readLines
public static List readLines(Reader input) throws IOException
Reader
as a list of Strings, one entry per line. From source file:it.unimi.di.big.mg4j.document.WarcDocumentSequence.java
public static void main(String[] args) throws Exception { SimpleJSAP jsap = new SimpleJSAP(WarcDocumentSequence.class.getName(), "Saves a serialised Warc document sequence based on a set of file names.", new Parameter[] { new FlaggedOption("factory", JSAP.CLASS_PARSER, IdentityDocumentFactory.class.getName(), JSAP.NOT_REQUIRED, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, 'p', "property", "A 'key=value' specification, or the name of a property file") .setAllowMultipleDeclarations(true), new Switch("gzip", 'z', "gzip", "Expect gzip-ed WARC content (files should end in .warc.gz)."), new FlaggedOption("bufferSize", JSAP.INTSIZE_PARSER, DEFAULT_BUFFER_SIZE, JSAP.NOT_REQUIRED, 'b', "buffer-size", "The size of an I/O buffer."), new UnflaggedOption("sequence", JSAP.STRING_PARSER, JSAP.REQUIRED, "The filename for the serialized sequence."), new UnflaggedOption("basename", JSAP.STRING_PARSER, JSAP.NO_DEFAULT, JSAP.NOT_REQUIRED, JSAP.GREEDY, "A list of basename files that will be indexed. If missing, a list of files will be read from standard input.") }); final JSAPResult jsapResult = jsap.parse(args); if (jsap.messagePrinted()) System.exit(1);//w w w . j a v a 2s.c o m final DocumentFactory factory = PropertyBasedDocumentFactory.getInstance(jsapResult.getClass("factory"), jsapResult.getStringArray("property")); final boolean isGZipped = jsapResult.getBoolean("gzip"); String[] file = jsapResult.getStringArray("basename"); if (file.length == 0) file = IOUtils.readLines(System.in).toArray(new String[0]); if (file.length == 0) LOGGER.warn("Empty fileset"); BinIO.storeObject(new WarcDocumentSequence(file, factory, isGZipped, jsapResult.getInt("bufferSize")), jsapResult.getString("sequence")); }
From source file:edu.cornell.mannlib.vitro.webapp.controller.datatools.dumprestore.NQuadLineSplitterTest.java
@BeforeClass public static void readTestData() throws IOException { InputStream stream = NQuadLineSplitterTest.class.getResourceAsStream("NQuadLineSplitterTest.nq"); testData = IOUtils.readLines(stream); }
From source file:gaffer.example.gettingstarted.util.DataUtils.java
public static List<String> loadData(final InputStream dataStream) { List<String> lines = null; try {//from www . j a v a 2 s .com lines = IOUtils.readLines(dataStream); } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeQuietly(dataStream); } return lines; }
From source file:com.intropro.prairie.utils.FileUtils.java
public static List<String> readLineInDirectory(File file) throws IOException { List<String> result = new LinkedList<>(); for (String child : file.list()) { File childFile = new File(file, child); if (childFile.isDirectory()) { result.addAll(readLineInDirectory(childFile)); } else {//from w w w .j av a2 s . c om result.addAll(IOUtils.readLines(new FileInputStream(childFile))); } } return result; }
From source file:com.linkedin.pinot.perf.ForwardIndexWriterBenchmark.java
public static void convertRawToForwardIndex(File rawFile) throws Exception { List<String> lines = IOUtils.readLines(new FileReader(rawFile)); int totalDocs = lines.size(); int max = Integer.MIN_VALUE; int maxNumberOfMultiValues = Integer.MIN_VALUE; int totalNumValues = 0; int data[][] = new int[totalDocs][]; for (int i = 0; i < lines.size(); i++) { String line = lines.get(i); String[] split = line.split(","); totalNumValues = totalNumValues + split.length; if (split.length > maxNumberOfMultiValues) { maxNumberOfMultiValues = split.length; }//from w w w .j av a2 s .com data[i] = new int[split.length]; for (int j = 0; j < split.length; j++) { String token = split[j]; int val = Integer.parseInt(token); data[i][j] = val; if (val > max) { max = val; } } } int maxBitsNeeded = (int) Math.ceil(Math.log(max) / Math.log(2)); int size = 2048; int[] offsets = new int[size]; int bitMapSize = 0; File outputFile = new File("output.mv.fwd"); FixedBitMultiValueWriter fixedBitSkipListSCMVWriter = new FixedBitMultiValueWriter(outputFile, totalDocs, totalNumValues, maxBitsNeeded); for (int i = 0; i < totalDocs; i++) { fixedBitSkipListSCMVWriter.setIntArray(i, data[i]); if (i % size == size - 1) { MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(offsets); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos); rr1.serialize(dos); dos.close(); // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size()); bitMapSize += bos.size(); } else if (i == totalDocs - 1) { MutableRoaringBitmap rr1 = MutableRoaringBitmap.bitmapOf(Arrays.copyOf(offsets, i % size)); ByteArrayOutputStream bos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(bos); rr1.serialize(dos); dos.close(); // System.out.println("Chunk " + i / size + " bitmap size:" + bos.size()); bitMapSize += bos.size(); } } fixedBitSkipListSCMVWriter.close(); System.out.println("Output file size:" + outputFile.length()); System.out.println("totalNumberOfDoc\t\t\t:" + totalDocs); System.out.println("totalNumberOfValues\t\t\t:" + totalNumValues); System.out.println("chunk size\t\t\t\t:" + size); System.out.println("Num chunks\t\t\t\t:" + totalDocs / size); int numChunks = totalDocs / size + 1; int totalBits = (totalNumValues * maxBitsNeeded); int dataSizeinBytes = (totalBits + 7) / 8; System.out.println("Raw data size with fixed bit encoding\t:" + dataSizeinBytes); System.out.println("\nPer encoding size"); System.out.println(); System.out.println("size (offset + length)\t\t\t:" + ((totalDocs * (4 + 4)) + dataSizeinBytes)); System.out.println(); System.out.println("size (offset only)\t\t\t:" + ((totalDocs * (4)) + dataSizeinBytes)); System.out.println(); System.out.println("bitMapSize\t\t\t\t:" + bitMapSize); System.out.println("size (with bitmap)\t\t\t:" + (bitMapSize + (numChunks * 4) + dataSizeinBytes)); System.out.println(); System.out.println("Custom Bitset\t\t\t\t:" + (totalNumValues + 7) / 8); System.out.println("size (with custom bitset)\t\t\t:" + (((totalNumValues + 7) / 8) + (numChunks * 4) + dataSizeinBytes)); }
From source file:com.github.DataLoader.java
public static Map<String, Repository> loadRepositories() throws IOException { final Map<String, Repository> repositories = new HashMap<String, Repository>(); final Map<String, String> relationships = new HashMap<String, String>(); // First, discover all the repositories. for (final Object o : IOUtils .readLines(DataLoader.class.getClassLoader().getResourceAsStream("data/repos.txt"))) { final String line = (String) o; final String[] mainParts = line.trim().split(":"); final String repo_id = mainParts[0]; final String[] repo_data = mainParts[1].split(","); final String created_at = repo_data[1]; final String parent_id = repo_data.length == 3 ? repo_data[2] : null; final String[] fullName = repo_data[0].split("/"); final String owner = fullName[0]; final String name = fullName[1]; repositories.put(repo_id, new Repository(repo_id, owner, name, created_at)); if (parent_id != null) { relationships.put(repo_id, parent_id); }//w w w . j av a2 s .c o m } // Now that all the repositories have been loaded, establish any parent-child relationships. for (final Map.Entry<String, String> pair : relationships.entrySet()) { repositories.get(pair.getKey()).setParent(repositories.get(pair.getValue())); } final Map<String, Watcher> watchers = new HashMap<String, Watcher>(); for (final Object o : IOUtils .readLines(DataLoader.class.getClassLoader().getResourceAsStream("data/data.txt"))) { final String line = (String) o; final String[] mainParts = line.trim().split(":"); final String user_id = mainParts[0]; final String repo_id = mainParts[1]; final Watcher watcher = watchers.get(user_id) == null ? new Watcher(user_id) : watchers.get(user_id); repositories.get(repo_id).associate(watcher); } return repositories; }
From source file:com.doculibre.constellio.utils.license.ApplyLicenseUtils.java
@SuppressWarnings("unchecked") private static List<String> readLines(File file) throws IOException { FileInputStream is = new FileInputStream(file); List<String> lines = IOUtils.readLines(is); IOUtils.closeQuietly(is);//w w w .j ava 2 s . c om return lines; }
From source file:com.alexholmes.hadooputils.io.FileUtils.java
/** * Read the contents of the supplied file into a list. * * @param fs a Hadoop file system/*from w ww .ja v a 2 s. co m*/ * @param p the file path * @return array of lines in the file * @throws java.io.IOException if something goes wrong */ public static List<String> readLines(final FileSystem fs, final Path p) throws IOException { InputStream stream = fs.open(p); try { return IOUtils.readLines(stream); } finally { stream.close(); } }
From source file:dz.jtsgen.processor.helper.OutputHelper.java
public static List<String> findSourceLine(Compilation c, String packageName, String fileName, Pattern pattern) throws IOException { Assert.assertTrue(c.generatedFile(StandardLocation.SOURCE_OUTPUT, packageName, fileName).isPresent()); JavaFileObject jfo = c.generatedFile(StandardLocation.SOURCE_OUTPUT, packageName, fileName).get(); try (Reader r = jfo.openReader(false)) { return IOUtils.readLines(r).stream().filter((x) -> pattern.matcher(x).find()) .collect(Collectors.toList()); }//from w ww . j a v a 2 s . com }
From source file:com.splunk.shuttl.testutil.SplunkTestUtils.java
public static List<String> readSearchResults(InputStream results) { try {//from ww w . j a v a2 s . c o m return IOUtils.readLines(results); } catch (IOException e) { throw new RuntimeException(e); } }