Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:hydrograph.engine.cascading.scheme.hive.parquet.HiveParquetScheme.java

License:Apache License

@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf jobConf) {
    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);

    jobConf.set(DataWritableWriteSupport.PARQUET_HIVE_SCHEMA,
            HiveParquetSchemeHelper.getParquetSchemeMessage(hiveTableDescriptor));

    ParquetOutputFormat.setWriteSupportClass(jobConf, DataWritableWriteSupport.class);
}

From source file:hydrograph.engine.cascading.scheme.parquet.ParquetTupleScheme.java

License:Apache License

@Override
public void sinkConfInit(FlowProcess<? extends JobConf> fp, Tap<JobConf, RecordReader, OutputCollector> tap,
        JobConf jobConf) {
    jobConf.setOutputFormat(DeprecatedParquetOutputFormat.class);

    jobConf.set(ParquetTupleWriter.PARQUET_CASCADING_SCHEMA, parquetSchema);
    ParquetOutputFormat.setWriteSupportClass(jobConf, ParquetTupleWriter.class);
}

From source file:IndexService.IndexMergeMR.java

License:Open Source License

public static RunningJob run(String inputfiles, String outputdir, Configuration conf) {
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf job = new JobConf(conf);
    job.setJobName("MergeIndexMR");
    job.setJarByClass(IndexMergeMR.class);
    job.setNumReduceTasks(1);// w w  w .  ja  va 2 s.  c  o  m
    FileSystem fs = null;
    try {
        fs = FileSystem.get(job);
        fs.delete(new Path(outputdir), true);

        String[] ifs = inputfiles.split(",");
        TreeSet<String> files = new TreeSet<String>();
        for (int i = 0; i < ifs.length; i++) {
            IFormatDataFile ifdf = new IFormatDataFile(job);
            ifdf.open(ifs[i]);
            Collection<String> strs = ifdf.fileInfo().head().getUdi().infos().values();
            for (String str : strs) {
                files.add(str);
            }
            ifdf.close();
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str + ",");
        }
        job.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        IFormatDataFile ifdf = new IFormatDataFile(job);
        ifdf.open(ifs[0]);

        HashMap<Integer, IRecord.IFType> map = ifdf.fileInfo().head().fieldMap().fieldtypes();
        ArrayList<String> fieldStrings = new ArrayList<String>();

        for (int i = 0; i < map.size(); i++) {
            IRecord.IFType type = map.get(i);
            fieldStrings.add(type.type() + ConstVar.RecordSplit + type.idx());
        }

        job.setStrings(ConstVar.HD_fieldMap, fieldStrings.toArray(new String[fieldStrings.size()]));
        job.set("datafiletype", ifdf.fileInfo().head().getUdi().infos().get(123456));
        ifdf.close();
    } catch (Exception e2) {
        e2.printStackTrace();
    }

    FileInputFormat.setInputPaths(job, inputfiles);
    FileOutputFormat.setOutputPath(job, new Path(outputdir));

    job.setOutputKeyClass(IndexKey.class);
    job.setOutputValueClass(IndexValue.class);

    job.setPartitionerClass(IndexMergePartitioner.class);

    job.setMapperClass(MergeIndexMap.class);
    job.setCombinerClass(MergeIndexReduce.class);
    job.setReducerClass(MergeIndexReduce.class);

    job.setInputFormat(IndexMergeIFormatInputFormat.class);
    job.setOutputFormat(IndexMergeIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(job);
        return jc.submitJob(job);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:IndexService.IndexMR.java

License:Open Source License

public static RunningJob run(Configuration conf2, String inputfiles, boolean column, String ids,
        String outputdir) {/* w  ww.j  av a2s .c o  m*/
    if (inputfiles == null || outputdir == null)
        return null;

    JobConf conf = new JobConf(conf2);
    conf.setJobName("IndexMR:\t" + ids);
    conf.setJarByClass(IndexMR.class);
    FileSystem fs = null;
    try {
        fs = FileSystem.get(conf);
        fs.delete(new Path(outputdir), true);
    } catch (IOException e3) {
        e3.printStackTrace();
    }

    conf.set("index.ids", ids);
    if (column) {
        conf.set("datafiletype", "column");
    } else {
        conf.set("datafiletype", "format");
    }

    String[] ifs = inputfiles.split(",");
    long wholerecnum = 0;

    String[] idxs = ids.split(",");
    String[] fieldStrings = new String[idxs.length + 2];

    if (!column) {
        IFormatDataFile ifdf;
        try {
            ifdf = new IFormatDataFile(conf);
            ifdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = ifdf.fileInfo().head().fieldMap().fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            ifdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }
    } else {
        try {
            IColumnDataFile icdf = new IColumnDataFile(conf);
            icdf.open(ifs[0]);
            for (int i = 0; i < idxs.length; i++) {
                int id = Integer.parseInt(idxs[i]);
                byte type = icdf.fieldtypes().get(id).type();
                fieldStrings[i] = type + ConstVar.RecordSplit + i;
            }
            icdf.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

    }

    fieldStrings[fieldStrings.length - 2] = ConstVar.FieldType_Short + ConstVar.RecordSplit
            + (fieldStrings.length - 2);
    fieldStrings[fieldStrings.length - 1] = ConstVar.FieldType_Int + ConstVar.RecordSplit
            + (fieldStrings.length - 1);

    conf.setStrings(ConstVar.HD_fieldMap, fieldStrings);

    if (!column) {
        conf.set(ConstVar.HD_index_filemap, inputfiles);
        for (String file : ifs) {
            IFormatDataFile fff;
            try {
                fff = new IFormatDataFile(conf);
                fff.open(file);
                wholerecnum += fff.segIndex().recnum();
                fff.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    } else {
        HashSet<String> files = new HashSet<String>();
        for (String file : ifs) {
            files.add(file);
        }
        StringBuffer sb = new StringBuffer();
        for (String str : files) {
            sb.append(str).append(",");
        }
        conf.set(ConstVar.HD_index_filemap, sb.substring(0, sb.length() - 1));

        for (String file : files) {
            Path parent = new Path(file).getParent();
            try {
                FileStatus[] fss = fs.listStatus(parent);
                String openfile = "";
                for (FileStatus status : fss) {
                    if (status.getPath().toString().contains(file)) {
                        openfile = status.getPath().toString();
                        break;
                    }
                }
                IFormatDataFile fff = new IFormatDataFile(conf);
                fff.open(openfile);
                wholerecnum += fff.segIndex().recnum();
                fff.close();

            } catch (IOException e) {
                e.printStackTrace();
            } catch (Exception e) {
                e.printStackTrace();
            }
        }
    }

    conf.setNumReduceTasks((int) ((wholerecnum - 1) / (100000000) + 1));

    FileInputFormat.setInputPaths(conf, inputfiles);
    Path outputPath = new Path(outputdir);
    FileOutputFormat.setOutputPath(conf, outputPath);

    conf.setOutputKeyClass(IndexKey.class);
    conf.setOutputValueClass(IndexValue.class);

    conf.setPartitionerClass(IndexPartitioner.class);

    conf.setMapperClass(IndexMap.class);
    conf.setCombinerClass(IndexReduce.class);
    conf.setReducerClass(IndexReduce.class);

    if (column) {
        conf.setInputFormat(IColumnInputFormat.class);
    } else {
        conf.setInputFormat(IFormatInputFormat.class);
    }
    conf.setOutputFormat(IndexIFormatOutputFormat.class);

    try {
        JobClient jc = new JobClient(conf);
        return jc.submitJob(conf);
    } catch (IOException e) {
        e.printStackTrace();
        return null;
    }
}

From source file:infinidb.hadoop.db.InfiniDBOutputFormat.java

License:Apache License

/**
  * Initializes the reduce-part of the job with the appropriate output settings
  * //from   www .j ava 2  s.c  om
  * @param job
  *          The job
  * @param tableName
  *          The table to insert data into
  * @param fieldNames
  *          The field names in the table. If unknown, supply the appropriate
  *          number of nulls.
  */
public static void setOutput(JobConf job, String schemaName, String... tableNames) {
    job.setOutputFormat(InfiniDBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);
    dbConf.setOutputSchemaName(schemaName);
    dbConf.setOutputTableNames(tableNames);
}

From source file:infinidb.hadoop.db.InfiniDBOutputFormat.java

License:Apache License

/**
 * Initializes the reduce-part of the job with the appropriate output settings
 * /*from   www.  j  a va  2s.  co  m*/
 * @param job
 *          The job
 * @param tableName
 *          The table to insert data into
 * @param fieldNames
 *          The field names in the table. If unknown, supply the appropriate
 *          number of nulls.
 */
public static void setOutput(JobConf job, String schemaName) {
    job.setOutputFormat(InfiniDBOutputFormat.class);
    job.setReduceSpeculativeExecution(false);

    InfiniDBConfiguration dbConf = new InfiniDBConfiguration(job);

    dbConf.setOutputSchemaName(schemaName);
}

From source file:infinidb.hadoop.example.InfiniDBOutputDriver.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration conf = new Configuration();
    JobConf jobconf = new JobConf(conf, InfiniDoopDriver.class);
    DBConfiguration.configureDB(jobconf, "com.mysql.jdbc.Driver", "jdbc:mysql://srvswint4/tpch1", "root", "");
    String[] fields = { "n_nationkey", "n_name" };
    String[] outFields = { "id", "name" };
    jobconf.setInputFormat(IDBFileInputFormat.class);
    jobconf.setOutputFormat(InfiniDBOutputFormat.class);
    jobconf.setOutputKeyClass(NullWritable.class);
    jobconf.setOutputValueClass(Text.class);
    InfiniDBOutputFormat.setOutput(jobconf, "db", outFields);
    InfiniDBConfiguration idbconf = new InfiniDBConfiguration(jobconf);
    idbconf.setInputPath("input");
    idbconf.setOutputPath("output");
    idbconf.setInfiniDBHome("/usr/local/Calpont");

    jobconf.setMapperClass(InfiniDoopMapper.class);
    jobconf.setNumMapTasks(1);//from w  ww.  ja  v a 2  s  .  com
    jobconf.setNumReduceTasks(2);
    JobClient client = new JobClient();
    client.setConf(jobconf);
    try {
        JobClient.runJob(jobconf);
    } catch (Exception e) {
        e.printStackTrace();
    }

    return 0;
}

From source file:invertedIndex.startJob.java

public static void start(String[] args) {
    try {//from www .  j av a 2  s .c  o  m
        JobConf conf = new JobConf(WordCount.class);
        conf.setJobName("wordcount");

        conf.setOutputKeyClass(Text.class);
        conf.setOutputValueClass(IntWritable.class);

        lineIndexMapper Map = new lineIndexMapper();
        conf.setMapperClass(Map.getClass());

        lineIndexReducer Reduce = new lineIndexReducer();
        conf.setCombinerClass(Reduce.getClass());
        conf.setReducerClass(Reduce.getClass());

        conf.setInputFormat(TextInputFormat.class);
        conf.setOutputFormat(TextOutputFormat.class);

        FileInputFormat.setInputPaths(conf, new Path(args[1]));

        Path outputDir = new Path(args[2]);

        outputDir.getFileSystem(conf).delete(outputDir, true);
        FileSystem fs = FileSystem.get(conf);
        fs.delete(outputDir, true);

        FileOutputFormat.setOutputPath(conf, outputDir);

        JobClient.runJob(conf);

    } catch (Exception Exp) {

        Exp.printStackTrace();
    }
}

From source file:io.bfscan.clueweb12.DumpWarcRecordsToPlainText.java

License:Apache License

/**
 * Runs this tool.//from  ww  w.  jav a 2  s  .co m
 */
@SuppressWarnings("static-access")
public int run(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("input path").create(INPUT_OPTION));
    options.addOption(
            OptionBuilder.withArgName("path").hasArg().withDescription("output path").create(OUTPUT_OPTION));

    CommandLine cmdline;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        System.err.println("Error parsing command line: " + exp.getMessage());
        return -1;
    }

    if (!cmdline.hasOption(INPUT_OPTION) || !cmdline.hasOption(OUTPUT_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(this.getClass().getName(), options);
        ToolRunner.printGenericCommandUsage(System.out);
        return -1;
    }

    String input = cmdline.getOptionValue(INPUT_OPTION);
    String output = cmdline.getOptionValue(OUTPUT_OPTION);

    LOG.info("Tool name: " + DumpWarcRecordsToPlainText.class.getSimpleName());
    LOG.info(" - input: " + input);
    LOG.info(" - output: " + output);

    JobConf conf = new JobConf(getConf(), DumpWarcRecordsToPlainText.class);
    conf.setJobName(DumpWarcRecordsToPlainText.class.getSimpleName() + ":" + input);

    conf.setNumReduceTasks(0);

    FileInputFormat.addInputPaths(conf, input);
    FileOutputFormat.setOutputPath(conf, new Path(output));

    conf.setInputFormat(ClueWeb12InputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);
    conf.setMapperClass(MyMapper.class);

    RunningJob job = JobClient.runJob(conf);
    Counters counters = job.getCounters();
    int numDocs = (int) counters.findCounter(Records.PAGES).getCounter();

    LOG.info("Read " + numDocs + " docs.");

    return 0;
}

From source file:io.fluo.stress.trie.Generate.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    if (args.length != 4) {
        log.error("Usage: " + this.getClass().getSimpleName()
                + " <numMappers> <numbersPerMapper> <max> <output dir>");
        System.exit(-1);/*from  w w w .  j ava  2 s.c  o m*/
    }

    int numMappers = Integer.parseInt(args[0]);
    int numPerMapper = Integer.parseInt(args[1]);
    long max = Long.parseLong(args[2]);
    Path out = new Path(args[3]);

    Preconditions.checkArgument(numMappers > 0, "numMappers <= 0");
    Preconditions.checkArgument(numPerMapper > 0, "numPerMapper <= 0");
    Preconditions.checkArgument(max > 0, "max <= 0");

    JobConf job = new JobConf(getConf());

    job.setJobName(this.getClass().getName());

    job.setJarByClass(Generate.class);

    job.setInt(TRIE_GEN_NUM_PER_MAPPER_PROP, numPerMapper);
    job.setInt(TRIE_GEN_NUM_MAPPERS_PROP, numMappers);
    job.setLong(TRIE_GEN_MAX_PROP, max);

    job.setInputFormat(RandomLongInputFormat.class);

    job.setNumReduceTasks(0);

    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setOutputFormat(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, out);

    RunningJob runningJob = JobClient.runJob(job);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : -1;
}