List of usage examples for org.apache.mahout.utils.io ChunkedWriter close
@Override
public void close() throws IOException
From source file:edu.indiana.d2i.htrc.io.SequentialDataCopyJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { if (args.length != 5) { printUsage();// w w w . j a va 2 s . c o m } String inputPath = args[0]; String outputPath = args[1]; int chunkSizeInMB = Integer.valueOf(args[2]); String dataAPIConfClassName = args[3]; int maxIdsPerReq = Integer.valueOf(args[4]); logger.info("SequentialDataCopyJob "); logger.info(" - input: " + inputPath); logger.info(" - output: " + outputPath); logger.info(" - chunkSizeInMB: " + chunkSizeInMB); logger.info(" - dataAPIConfClassName: " + dataAPIConfClassName); logger.info(" - maxIdsPerReq: " + maxIdsPerReq); Configuration conf = getConf(); Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq); HTRCDataAPIClient client = Utilities.creatDataAPIClient(conf); ChunkedWriter chunkWriter = new ChunkedWriter(getConf(), chunkSizeInMB, new Path(outputPath)); Path input = new Path(inputPath); FileSystem fs = input.getFileSystem(conf); DataInputStream fsinput = new DataInputStream(fs.open(input)); BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput)); String line = null; int idNumThreshold = 100; int idNum = 0; StringBuilder idList = new StringBuilder(); while ((line = reader.readLine()) != null) { idList.append(line + "|"); if ((++idNum) >= idNumThreshold) { text2Seq(client.getID2Content(idList.toString()), chunkWriter); idList = new StringBuilder(); idNum = 0; } } if (idList.length() > 0) text2Seq(client.getID2Content(idList.toString()), chunkWriter); chunkWriter.close(); reader.close(); return 0; }