Example usage for org.apache.mahout.utils.io ChunkedWriter close

List of usage examples for org.apache.mahout.utils.io ChunkedWriter close

Introduction

In this page you can find the example usage for org.apache.mahout.utils.io ChunkedWriter close.

Prototype

@Override
    public void close() throws IOException 

Source Link

Usage

From source file:edu.indiana.d2i.htrc.io.SequentialDataCopyJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        printUsage();//  w w w  .  j  a  va 2  s . c o m
    }

    String inputPath = args[0];
    String outputPath = args[1];
    int chunkSizeInMB = Integer.valueOf(args[2]);
    String dataAPIConfClassName = args[3];
    int maxIdsPerReq = Integer.valueOf(args[4]);

    logger.info("SequentialDataCopyJob ");
    logger.info(" - input: " + inputPath);
    logger.info(" - output: " + outputPath);
    logger.info(" - chunkSizeInMB: " + chunkSizeInMB);
    logger.info(" - dataAPIConfClassName: " + dataAPIConfClassName);
    logger.info(" - maxIdsPerReq: " + maxIdsPerReq);

    Configuration conf = getConf();
    Utilities.setDataAPIConf(conf, dataAPIConfClassName, maxIdsPerReq);

    HTRCDataAPIClient client = Utilities.creatDataAPIClient(conf);

    ChunkedWriter chunkWriter = new ChunkedWriter(getConf(), chunkSizeInMB, new Path(outputPath));

    Path input = new Path(inputPath);
    FileSystem fs = input.getFileSystem(conf);
    DataInputStream fsinput = new DataInputStream(fs.open(input));
    BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput));
    String line = null;
    int idNumThreshold = 100;
    int idNum = 0;
    StringBuilder idList = new StringBuilder();
    while ((line = reader.readLine()) != null) {
        idList.append(line + "|");
        if ((++idNum) >= idNumThreshold) {
            text2Seq(client.getID2Content(idList.toString()), chunkWriter);
            idList = new StringBuilder();
            idNum = 0;
        }
    }
    if (idList.length() > 0)
        text2Seq(client.getID2Content(idList.toString()), chunkWriter);

    chunkWriter.close();
    reader.close();

    return 0;
}