Example usage for org.apache.hadoop.io SequenceFile createWriter

List of usage examples for org.apache.hadoop.io SequenceFile createWriter

Introduction

In this page you can find the example usage for org.apache.hadoop.io SequenceFile createWriter.

Prototype

@Deprecated
public static Writer createWriter(Configuration conf, FSDataOutputStream out, Class keyClass, Class valClass,
        CompressionType compressionType, CompressionCodec codec) throws IOException 

Source Link

Document

Construct the preferred type of 'raw' SequenceFile Writer.

Usage

From source file:io.covert.binary.analysis.BuildSequenceFileFromTarball.java

License:Apache License

public void load(FileSystem fs, Configuration conf, File inputTarball, Path outputDir) throws Exception {
    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Path sequenceName = new Path(outputDir, inputTarball.getName() + ".seq");
    System.out.println("Writing to " + sequenceName);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class,
            BytesWritable.class, CompressionType.RECORD);

    InputStream is = new FileInputStream(inputTarball);
    if (inputTarball.toString().toLowerCase().endsWith(".gz")) {
        is = new GZIPInputStream(is);
    } else if (inputTarball.toString().toLowerCase().endsWith(".bz")
            || inputTarball.toString().endsWith(".bz2")) {
        is.read(); // read 'B'
        is.read(); // read 'Z'
        is = new CBZip2InputStream(is);
    }/*from   ww  w.j  a  v a  2  s  . c o  m*/

    final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
            .createArchiveInputStream("tar", is);
    TarArchiveEntry entry = null;
    while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
        if (!entry.isDirectory()) {

            try {
                final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
                byte[] outputFile = outputFileStream.toByteArray();
                val.set(outputFile, 0, outputFile.length);

                MessageDigest md = MessageDigest.getInstance("MD5");
                md.update(outputFile);
                byte[] digest = md.digest();
                String hexdigest = "";
                for (int i = 0; i < digest.length; i++) {
                    hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1);
                }
                key.set(hexdigest);
                writer.append(key, val);
            } catch (IOException e) {
                System.err.println("Warning: tarball may be truncated: " + inputTarball);
                // Truncated Tarball
                break;
            }
        }
    }
    debInputStream.close();
    writer.close();
}

From source file:io.covert.binary.analysis.BuildTarBzSequenceFile.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    File inDir = new File(args[0]);
    Path name = new Path(args[1]);

    Text key = new Text();
    BytesWritable val = new BytesWritable();

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);
    if (!fs.exists(name)) {
        fs.mkdirs(name);//from   ww w.j  a  v  a 2s .c  o  m
    }
    for (File file : inDir.listFiles()) {
        Path sequenceName = new Path(name, file.getName() + ".seq");
        System.out.println("Writing to " + sequenceName);
        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, sequenceName, Text.class,
                BytesWritable.class, CompressionType.RECORD);
        if (!file.isFile()) {
            System.out.println("Skipping " + file + " (not a file) ...");
            continue;
        }

        final InputStream is = new FileInputStream(file);
        final TarArchiveInputStream debInputStream = (TarArchiveInputStream) new ArchiveStreamFactory()
                .createArchiveInputStream("tar", is);
        TarArchiveEntry entry = null;
        while ((entry = (TarArchiveEntry) debInputStream.getNextEntry()) != null) {
            if (!entry.isDirectory()) {

                final ByteArrayOutputStream outputFileStream = new ByteArrayOutputStream();
                IOUtils.copy(debInputStream, outputFileStream);
                outputFileStream.close();
                byte[] outputFile = outputFileStream.toByteArray();
                val.set(outputFile, 0, outputFile.length);

                MessageDigest md = MessageDigest.getInstance("MD5");
                md.update(outputFile);
                byte[] digest = md.digest();
                String hexdigest = "";
                for (int i = 0; i < digest.length; i++) {
                    hexdigest += Integer.toString((digest[i] & 0xff) + 0x100, 16).substring(1);
                }
                key.set(hexdigest);
                writer.append(key, val);
            }
        }
        debInputStream.close();
        writer.close();
    }

    return 0;
}

From source file:io.hops.erasure_coding.MapReduceEncoder.java

License:Apache License

/**
 * set up input file which has the list of input files.
 *
 * @return boolean//from w w w .  j  av  a  2 s .  c o m
 * @throws java.io.IOException
 */
private boolean prepareJob(PolicyInfo info) throws IOException {
    final String randomId = getRandomId();
    JobClient jClient = new JobClient(jobconf);
    Path jobdir = new Path(jClient.getSystemDir(), NAME + "_" + randomId);

    LOG.info(JOB_DIR_LABEL + "=" + jobdir);
    jobconf.set(JOB_DIR_LABEL, jobdir.toString());
    Path log = new Path(jobdir, "_logs");

    FileOutputFormat.setOutputPath(jobconf, log);
    LOG.info("log=" + log);

    // create operation list
    FileSystem fs = jobdir.getFileSystem(jobconf);
    Path opList = new Path(jobdir, "_" + OP_LIST_LABEL);
    jobconf.set(OP_LIST_LABEL, opList.toString());
    SequenceFile.Writer opWriter = null;

    try {
        opWriter = SequenceFile.createWriter(fs, jobconf, opList, Text.class, PolicyInfo.class,
                SequenceFile.CompressionType.NONE);
        opWriter.append(new Text(info.getSrcPath().toString()), info);
    } finally {
        if (opWriter != null) {
            opWriter.close();
        }
    }

    jobconf.setInt(OP_COUNT_LABEL, 1);
    jobconf.setNumMapTasks(1);
    LOG.info("jobName= " + jobName + " numMapTasks=" + jobconf.getNumMapTasks());
    return true;
}

From source file:map_reduce.MapReduce_OptimizedBrandesAdditions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override/*from   w ww.j  a  v  a 2 s .  c  o  m*/
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesAdditions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_additions_nocomb_10k_1 output_iterbrandes_additions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesAdditions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_additions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesAdditionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + M + re); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}

From source file:map_reduce.MapReduce_OptimizedBrandesDeletions_DO_JUNG.java

License:Open Source License

@SuppressWarnings("deprecation")
@Override// ww w .  ja  va  2  s  .  c om
public int run(String[] args) throws Exception {
    if (args.length < 1) {
        System.err.println("Usage:\n");
        System.exit(1);
    }

    //       Job job = new Job(super.getConf());

    //      READ IN ALL COMMAND LINE ARGUMENTS
    //      EXAMPLE: 
    // hadoop jar MapReduce_OptimizedBrandesDeletions_DO_JUNG.jar
    // -libjars collections-generic-4.01.jar,jung-graph-impl-2.0.1.jar,jung-api-2.0.1.jar
    // -Dmapred.job.map.memory.mb=4096
    // -Dmapred.job.reduce.memory.mb=4096
    // -Dmapred.child.java.opts=-Xmx3500m
    // -Dmapreduce.task.timeout=60000000
    // -Dmapreduce.job.queuename=QUEUENAME
    // input_iterbrandes_deletions_nocomb_10k_1 output_iterbrandes_deletions_nocomb_10k_1
    // 10 1 10000 55245 10k 10k_randedges 100 1 false times/ betweenness/

    int m = -1;

    // input path to use on hdfs
    Path inputPath = new Path(args[++m]);

    // output path to use on hdfs
    Path outputPath = new Path(args[++m]);

    // number of Mappers to split the sources: e.g., 1, 10, 100 etc.
    // rule of thumb: the larger the graph (i.e., number of roots to test), the larger should be this number.
    int numOfMaps = Integer.parseInt(args[++m]);

    // number of Reducers to collect the output
    int numOfReduce = Integer.parseInt(args[++m]);

    // Number of vertices in graph
    int N = Integer.parseInt(args[++m]);

    // Number of edges in graph
    int M = Integer.parseInt(args[++m]);

    // Graph file (edge list, tab delimited) (full path)
    String graph = args[++m];

    // File with edges to be added (tab delimited) (full path)
    // Note: this version handles only edges between existing vertices in the graph.
    String random_edges = args[++m];

    // Number of random edges added
    int re = Integer.parseInt(args[++m]);

    // Experiment iteration (in case of multiple experiments)
    int iter = Integer.parseInt(args[++m]);

    // Use combiner or not (true/false)
    Boolean comb = Boolean.valueOf(args[++m]);

    // Output path for file with stats
    String statsoutputpath = args[++m];

    // Output path for file with final betweenness values
    String betoutputpath = args[++m];

    //      BEGIN INITIALIZATION

    JobConf conf = new JobConf(getConf(), MapReduce_OptimizedBrandesDeletions_DO_JUNG.class);
    FileSystem fs = FileSystem.get(conf);

    String setup = "_deletions_edges" + re + "_maps" + numOfMaps + "_comb" + comb;
    conf.setJobName("OptimizedBrandesDeletionsDOJung_" + graph + setup + "_" + iter);
    conf.set("HDFS_GRAPH", graph + setup);
    conf.set("HDFS_Random_Edges", random_edges + setup);
    conf.set("output", outputPath.getName());
    conf.set("setup", setup);

    //      CREATE INPUT FILES FOR MAPPERS

    int numOfTasksperMap = (int) Math.ceil(N / numOfMaps);
    //generate an input file for each map task
    for (int i = 0; i < numOfMaps - 1; i++) {
        Path file = new Path(inputPath, "part-r-" + i);
        IntWritable start = new IntWritable(i * numOfTasksperMap);
        IntWritable end = new IntWritable((i * numOfTasksperMap) + numOfTasksperMap - 1);

        SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class,
                IntWritable.class, CompressionType.NONE);
        try {
            writer.append(start, end);
        } finally {
            writer.close();
        }
        System.out.println("Wrote input for Map #" + i + ": " + start + " - " + end);
    }

    // last mapper takes what is left
    Path file = new Path(inputPath, "part-r-" + (numOfMaps - 1));
    IntWritable start = new IntWritable((numOfMaps - 1) * numOfTasksperMap);
    IntWritable end = new IntWritable(N - 1);
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, IntWritable.class, IntWritable.class,
            CompressionType.NONE);
    try {
        writer.append(start, end);
    } finally {
        writer.close();
    }
    System.out.println("Wrote input for Map #" + (numOfMaps - 1) + ": " + start + " - " + end);

    //      COPY FILES TO MAPPERS
    System.out.println("Copying graph to cache");
    String LOCAL_GRAPH = graph;
    Path hdfsPath = new Path(graph + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_GRAPH), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    System.out.println("Copying random edges to cache");
    String LOCAL_Random_Edges = random_edges;
    hdfsPath = new Path(random_edges + setup);

    // upload the file to hdfs. Overwrite any existing copy.
    fs.copyFromLocalFile(false, true, new Path(LOCAL_Random_Edges), hdfsPath);
    DistributedCache.addCacheFile(hdfsPath.toUri(), conf);

    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapperClass(IterBrandesMapper.class);
    conf.setNumMapTasks(numOfMaps);

    if (comb)
        conf.setCombinerClass(IterBrandesReducer.class);

    conf.setReducerClass(IterBrandesReducer.class);
    conf.setNumReduceTasks(numOfReduce);

    // turn off speculative execution, because DFS doesn't handle multiple writers to the same file.
    conf.setSpeculativeExecution(false);

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setOutputFormat(SequenceFileOutputFormat.class);

    FileInputFormat.setInputPaths(conf, inputPath);
    FileOutputFormat.setOutputPath(conf, outputPath);

    // conf.set("mapred.job.name", "APS-" + outputPath.getName());
    conf.setNumTasksToExecutePerJvm(-1); // JVM reuse

    System.out.println("Starting the execution...! Pray!! \n");
    long time1 = System.nanoTime();
    RunningJob rj = JobClient.runJob(conf);
    long time2 = System.nanoTime();

    //      READ OUTPUT FILES

    System.out.println("\nFinished and now reading/writing Betweenness Output...\n");

    // Assuming 1 reducer.
    Path inFile = new Path(outputPath, "part-00000");
    IntWritable id = new IntWritable();
    DoubleWritable betweenness = new DoubleWritable();
    SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);

    FileWriter fw = new FileWriter(new File(betoutputpath + graph + setup + "_betweenness_" + iter));
    try {
        int i = 0;
        for (; i < (N + (M - re)); i++) {
            reader.next(id, betweenness);
            fw.write(id + "\t" + betweenness + "\n");
            fw.flush();
        }
    } finally {
        reader.close();
        fw.close();
    }

    System.out.println("\nWriting times Output...\n");

    fw = new FileWriter(new File(statsoutputpath + graph + setup + "_times_" + iter));

    fw.write("Total-time:\t" + (time2 - time1) + "\n");
    fw.write("total-map\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_MAPS") + "\n");
    fw.write("total-reduce\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("SLOTS_MILLIS_REDUCES") + "\n");
    fw.write("total-cpu-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("CPU_MILLISECONDS") + "\n");
    fw.write("total-gc-mr\t"
            + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter").getCounter("GC_TIME_MILLIS")
            + "\n");
    fw.write("total-phy-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("PHYSICAL_MEMORY_BYTES") + "\n");
    fw.write("total-vir-mem-mr\t" + rj.getCounters().getGroup("org.apache.hadoop.mapreduce.TaskCounter")
            .getCounter("VIRTUAL_MEMORY_BYTES") + "\n");
    fw.write("brandes\t" + rj.getCounters().getGroup("TimeForBrandes").getCounter("exectime_initial_brandes")
            + "\n");
    fw.write("reduce\t" + rj.getCounters().getGroup("TimeForReduce").getCounter("reduceafteralledges") + "\n");
    fw.flush();

    try {
        Iterator<Counters.Counter> counters = rj.getCounters().getGroup("TimeForRandomEdges").iterator();
        while (counters.hasNext()) {
            Counter cc = counters.next();
            fw.write(cc.getName() + "\t" + cc.getCounter() + "\n");
            fw.flush();
        }
    } finally {
        fw.close();
    }

    return 0;
}

From source file:nthu.scopelab.tsqr.ssvd.SSVDSolver.java

License:Apache License

/**
 * run all SSVD jobs./*ww w  .j  a  v  a  2  s .  c  o m*/
 * 
 * @throws IOException
 *           if I/O condition occurs.
 */
public void run() throws Exception {
    try {
        System.out.println("SSVD start!");
        FileSystem fs = FileSystem.get(conf);

        Path qPath = new Path(outputPath, "Q-job");
        Path btPath = new Path(outputPath, "Bt-job");
        Path yPath = new Path(outputPath, "Y-job"); //tetst phase
        Path uHatPath = new Path(outputPath, "UHat");
        Path svPath = new Path(outputPath, "Sigma");
        Path uPath = new Path(outputPath, "U");
        Path vPath = new Path(outputPath, "V");

        if (overwrite) {
            fs.delete(outputPath, true);
        }

        int[] iseed = { 0, 0, 0, 1 };
        double[] x = new double[1];
        Dlarnv.dlarnv(2, iseed, 0, 1, x, 0);
        long seed = (long) (x[0] * (double) Long.MAX_VALUE);

        long start, end;

        start = new Date().getTime();
        QJob.run(conf, inputPath, qPath.toString(), reduceSchedule, k, p, seed, mis);
        end = new Date().getTime();
        System.out.println("Q-Job done " + Long.toString(end - start));
        Logger LOG = LoggerFactory.getLogger(SSVDSolver.class);

        /*
         * restrict number of reducers to a reasonable number so we don't have to
         * run too many additions in the frontend when reconstructing BBt for the
         * last B' and BB' computations. The user may not realize that and gives a
         * bit too many (I would be happy i that were ever the case though).
         */

        start = new Date().getTime();
        BtJob.run(conf, inputPath, btPath, qPath.toString(), k, p, outerBlockHeight,
                q <= 0 ? Math.min(1000, reduceTasks) : reduceTasks, q <= 0, reduceSchedule, mis);

        end = new Date().getTime();
        System.out.println("Bt-Job done " + Long.toString(end - start));

        // power iterations is unnecessary in application of recommendation system      
        /*for (int i = 0; i < q; i++) {
          Path btPathGlob = new Path(btPath, BtJob.OUTPUT_BT + "-*");
         Path aBtPath = new Path(outputPath, String.format("ABt-job-%d", i + 1));      
           qPath = new Path(outputPath, String.format("ABtQ-job-%d", i + 1));      
           ABtDenseOutJob.run(conf,
                   inputPath,
                   btPathGlob,
                   aBtPath,//qPath,
                   //ablockRows,
                   //minSplitSize,
                   k,
                   p,
                   //abtBlockHeight,
                   reduceTasks,
                   //broadcast
             mis);
                 
         ToolRunner.run(conf, new QRFirstJob(), new String[]{
                 "-input", aBtPath.toString(),
                 "-output", qPath.toString(),
              "-mis",Integer.toString(mis),
              "-colsize", Integer.toString(k+p),
                 "-reduceSchedule", reduceSchedule});
                      
           btPath = new Path(outputPath, String.format("Bt-job-%d", i + 1));
                
           BtJob.run(conf,
          inputPath,
                 btPath,
          qPath.toString(),                 
          k,
          p,
          outerBlockHeight,
          i == q - 1 ? Math.min(1000, reduceTasks) : reduceTasks,
          i == q - 1,
                 reduceSchedule,
                 mis);
         }*/

        cmUpperTriangDenseMatrix bbt = loadAndSumUpperTriangMatrices(fs,
                new Path(btPath, BtJob.OUTPUT_BBT + "-*"), conf);

        // convert bbt to something our eigensolver could understand
        assert bbt.numColumns() == k + p;

        double[][] bbtSquare = new double[k + p][];
        for (int i = 0; i < k + p; i++) {
            bbtSquare[i] = new double[k + p];
        }

        for (int i = 0; i < k + p; i++) {
            for (int j = i; j < k + p; j++) {
                bbtSquare[i][j] = bbtSquare[j][i] = bbt.get(i, j);
            }
        }

        svalues = new double[k + p];

        // try something else.
        EigenSolver eigenWrapper = new EigenSolver(bbtSquare);
        double[] eigenva2 = eigenWrapper.getWR();

        for (int i = 0; i < k + p; i++) {
            svalues[i] = Math.sqrt(eigenva2[i]); // sqrt?
        }
        // save/redistribute UHat
        double[][] uHat = eigenWrapper.getVL();
        //double[][] uHat = eigenWrapper.getUHat();

        fs.mkdirs(uHatPath);
        SequenceFile.Writer uHatWriter = SequenceFile.createWriter(fs, conf,
                uHatPath = new Path(uHatPath, "uhat.seq"), IntWritable.class, VectorWritable.class,
                CompressionType.BLOCK);

        int m = uHat.length;
        IntWritable iw = new IntWritable();
        VectorWritable vw = new VectorWritable();

        for (int i = 0; i < m; i++) {
            vw.set(new DenseVector(uHat[i], true));
            iw.set(i);
            uHatWriter.append(iw, vw);
        }
        uHatWriter.close();

        SequenceFile.Writer svWriter = SequenceFile.createWriter(fs, conf,
                svPath = new Path(svPath, "svalues.seq"), IntWritable.class, VectorWritable.class,
                CompressionType.BLOCK);

        vw.set(new DenseVector(svalues, true));
        svWriter.append(iw, vw);

        svWriter.close();

        start = new Date().getTime();
        UJob ujob = null;
        if (computeU) {
            ujob = new UJob();
            ujob.start(conf, new Path(btPath, BtJob.Q_MAT + "-*"), uHatPath, svPath, uPath, k, cUHalfSigma,
                    mis);
            // actually this is map-only job anyway
        }

        VJob vjob = null;
        if (computeV) {
            vjob = new VJob();
            vjob.start(conf, new Path(btPath, BtJob.OUTPUT_BT + "-*"), uHatPath, svPath, vPath, k, reduceTasks,
                    subRowSize, cVHalfSigma, mis);
        }

        if (ujob != null) {
            ujob.waitForCompletion();
            this.uPath = uPath.toString();
        }
        System.out.println("U-Job done ");

        if (vjob != null) {
            vjob.waitForCompletion();
            this.vPath = vPath.toString();
        }
        end = new Date().getTime();
        System.out.println("U-Job+V-Job done " + (end - start));

    } catch (InterruptedException exc) {
        throw new IOException("Interrupted", exc);
    } catch (ClassNotFoundException exc) {
        throw new IOException(exc);
    }

}

From source file:org.apache.blur.manager.writer.SnapshotIndexDeletionPolicy.java

License:Apache License

private synchronized void storeGenerations() throws IOException {
    FileSystem fileSystem = _path.getFileSystem(_configuration);
    FileStatus[] listStatus = fileSystem.listStatus(_path);
    SortedSet<FileStatus> existing = new TreeSet<FileStatus>(Arrays.asList(listStatus));
    long currentFile;
    if (!existing.isEmpty()) {
        FileStatus last = existing.last();
        currentFile = Long.parseLong(last.getPath().getName());
    } else {/*from   w w  w  . j a  v a  2  s .  c o m*/
        currentFile = 0;
    }
    Path path = new Path(_path, buffer(currentFile + 1));
    LOG.info("Creating new snapshot file [{0}]", path);
    FSDataOutputStream outputStream = fileSystem.create(path, false);
    Writer writer = SequenceFile.createWriter(_configuration, outputStream, Text.class, LongWritable.class,
            CompressionType.NONE, null);
    for (Entry<String, Long> e : _namesToGenerations.entrySet()) {
        writer.append(new Text(e.getKey()), new LongWritable(e.getValue()));
    }
    writer.close();
    outputStream.close();
    cleanupOldFiles(fileSystem, existing);
}

From source file:org.apache.flink.streaming.connectors.fs.SequenceFileWriter.java

License:Apache License

@Override
public void open(FileSystem fs, Path path) throws IOException {
    super.open(fs, path);
    if (keyClass == null) {
        throw new IllegalStateException("Key Class has not been initialized.");
    }//w ww .ja  v a 2s  . c  o  m
    if (valueClass == null) {
        throw new IllegalStateException("Value Class has not been initialized.");
    }

    CompressionCodec codec = null;

    if (!compressionCodecName.equals("None")) {
        CompressionCodecFactory codecFactory = new CompressionCodecFactory(new Configuration());
        codec = codecFactory.getCodecByName(compressionCodecName);
        if (codec == null) {
            throw new RuntimeException("Codec " + compressionCodecName + " not found.");
        }
    }

    // the non-deprecated constructor syntax is only available in recent hadoop versions...
    writer = SequenceFile.createWriter(new Configuration(), getStream(), keyClass, valueClass, compressionType,
            codec);
}

From source file:org.apache.flume.sink.customhdfs.HDFSSequenceFile.java

License:Apache License

protected void open(Path dstPath, CompressionCodec codeC, CompressionType compType, Configuration conf,
        FileSystem hdfs) throws IOException {
    if (useRawLocalFileSystem) {
        if (hdfs instanceof LocalFileSystem) {
            hdfs = ((LocalFileSystem) hdfs).getRaw();
        } else {//from  w  ww . j av  a 2s. co  m
            logger.warn("useRawLocalFileSystem is set to true but file system "
                    + "is not of type LocalFileSystem: " + hdfs.getClass().getName());
        }
    }
    if (conf.getBoolean("hdfs.append.support", false) == true && hdfs.isFile(dstPath)) {
        outStream = hdfs.append(dstPath);
    } else {
        outStream = hdfs.create(dstPath);
    }
    writer = SequenceFile.createWriter(conf, outStream, serializer.getKeyClass(), serializer.getValueClass(),
            compType, codeC);

    registerCurrentStream(outStream, hdfs, dstPath);
}

From source file:org.apache.hadoop.examples.QuasiMonteCarlo.java

License:Apache License

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi//ww  w .j ava  2 s. com
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf)
        throws IOException, ClassNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    //setup job conf
    job.setJobName(QuasiMonteCarlo.class.getSimpleName());
    job.setJarByClass(QuasiMonteCarlo.class);

    job.setInputFormatClass(SequenceFileInputFormat.class);

    job.setOutputKeyClass(BooleanWritable.class);
    job.setOutputValueClass(LongWritable.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);

    job.setMapperClass(QmcMapper.class);

    job.setReducerClass(QmcReducer.class);
    job.setNumReduceTasks(1);

    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);

    //setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);

    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException(
                "Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }

    try {
        //generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class,
                    LongWritable.class, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }

        //start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = Time.monotonicNow();
        job.waitForCompletion(true);
        if (!job.isSuccessful()) {
            System.out.println("Job " + job.getJobID() + " failed!");
            System.exit(1);
        }
        final double duration = (Time.monotonicNow() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");

        //read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }

        //compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal,
                RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}