Example usage for org.apache.hadoop.mapreduce.lib.output TextOutputFormat SEPERATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output TextOutputFormat SEPERATOR.

Prototype

String SEPERATOR

To view the source code for org.apache.hadoop.mapreduce.lib.output TextOutputFormat SEPERATOR.

Click Source Link

Usage

From source file:co.cask.cdap.datapipeline.mock.NaiveBayesTrainer.java

License:Apache License

@Override
public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
    pipelineConfigurer.addStream(TEXTS_TO_CLASSIFY);
    pipelineConfigurer.createDataset(CLASSIFIED_TEXTS, KeyValueTable.class);
    pipelineConfigurer.createDataset(config.fileSetName, FileSet.class,
            FileSetProperties.builder().setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                    .build());//  w  w w.  j  a  v  a  2s. c om
}

From source file:co.cask.cdap.examples.fileset.FileSetExample.java

License:Apache License

@Override
public void configure() {
    setName("FileSetExample");
    setDescription("Application with a MapReduce that uses a FileSet dataset");
    createDataset("lines", FileSet.class,
            FileSetProperties.builder().setBasePath("example/data/lines").setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setDescription("Store input lines").build());
    createDataset("counts", FileSet.class,
            FileSetProperties.builder().setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                    .setDescription("Store word counts").build());
    addService(new FileSetService());
    addMapReduce(new WordCount());
}

From source file:co.cask.cdap.examples.loganalysis.LogAnalysisApp.java

License:Apache License

@Override
public void configure() {
    setDescription("CDAP Log Analysis App");

    // A stream to ingest log data
    addStream(new Stream(LOG_STREAM));

    // A Spark and MapReduce for processing log data
    addSpark(new ResponseCounterSpark());
    addMapReduce(new HitCounterProgram());

    addWorkflow(new LogAnalysisWorkflow());

    // Services to query for result
    addService(HIT_COUNTER_SERVICE, new HitCounterServiceHandler());
    addService(RESPONSE_COUNTER_SERVICE, new ResponseCounterHandler());
    addService(REQUEST_COUNTER_SERVICE, new RequestCounterHandler());

    // Datasets to store output after processing
    createDataset(RESPONSE_COUNT_STORE, KeyValueTable.class,
            DatasetProperties.builder().setDescription("Store response counts").build());
    createDataset(HIT_COUNT_STORE, KeyValueTable.class,
            DatasetProperties.builder().setDescription("Store hit counts").build());
    createDataset(REQ_COUNT_STORE, TimePartitionedFileSet.class,
            FileSetProperties.builder().setOutputFormat(TextOutputFormat.class)
                    .setOutputProperty(TextOutputFormat.SEPERATOR, ":").setDescription("Store request counts")
                    .build());/*from  ww w.  ja  v a  2 s  .  c o  m*/
}

From source file:co.cask.cdap.examples.sportresults.SportResults.java

License:Apache License

@Override
public void configure() {
    addService(new UploadService());
    addMapReduce(new ScoreCounter());

    // Create the "results" partitioned file set, configure it to work with MapReduce and with Explore
    createDataset("results", PartitionedFileSet.class, PartitionedFileSetProperties.builder()
            // Properties for partitioning
            .setPartitioning(Partitioning.builder().addStringField("league").addIntField("season").build())
            // Properties for file set
            .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class)
            .setOutputProperty(TextOutputFormat.SEPERATOR, ",")
            // Properties for Explore (to create a partitioned Hive table)
            .setEnableExploreOnCreate(true).setExploreFormat("csv")
            .setExploreSchema("date STRING, winner STRING, loser STRING, winnerpoints INT, loserpoints INT")
            .setDescription("FileSet dataset of game results for a sport league and season").build());

    // Create the aggregates partitioned file set, configure it to work with MapReduce and with Explore
    createDataset("totals", PartitionedFileSet.class, PartitionedFileSetProperties.builder()
            // Properties for partitioning
            .setPartitioning(Partitioning.builder().addStringField("league").build())
            // Properties for file set
            .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class)
            .setOutputProperty(TextOutputFormat.SEPERATOR, ",")
            // Properties for Explore (to create a partitioned Hive table)
            .setEnableExploreOnCreate(true).setExploreFormat("csv")
            .setExploreSchema("team STRING, wins INT, ties INT, losses INT, scored INT, conceded INT")
            .setDescription("FileSet dataset of aggregated results for each sport league").build());
}

From source file:co.cask.cdap.internal.app.runtime.batch.AppWithMapReduceUsingFileSet.java

License:Apache License

@Override
public void configure() {
    setName("AppWithMapReduceUsingFile");
    setDescription("Application with MapReduce job using file as dataset");
    createDataset(inputDataset, "fileSet",
            FileSetProperties.builder().setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                    .build());//w  w  w. j  a  v a2  s. co m
    if (!outputDataset.equals(inputDataset)) {
        createDataset(outputDataset, "fileSet",
                FileSetProperties.builder().setBasePath("foo/my-file-output")
                        .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class)
                        .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build());
    }
    addMapReduce(new ComputeSum());
}

From source file:co.cask.cdap.internal.app.runtime.batch.AppWithPartitionedFileSet.java

License:Apache License

@Override
public void configure() {
    setName("AppWithMapReduceUsingFile");
    setDescription("Application with MapReduce job using file as dataset");
    createDataset(INPUT, "table");
    createDataset(OUTPUT, "table");

    createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder()
            .setPartitioning(Partitioning.builder().addStringField("type").addLongField("time").build())
            // properties for file set
            .setBasePath("partitioned").setInputFormat(TextInputFormat.class)
            .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR)
            // don't configure properties for the Hive table - this is used in a context where explore is disabled
            .build());/*from w  ww .  java  2 s.c o m*/
    addMapReduce(new PartitionWriter());
    addMapReduce(new PartitionReader());
}

From source file:co.cask.cdap.internal.app.runtime.batch.AppWithTimePartitionedFileSet.java

License:Apache License

@Override
public void configure() {
    setName("AppWithMapReduceUsingFile");
    setDescription("Application with MapReduce job using file as dataset");
    createDataset(INPUT, "table");
    createDataset(OUTPUT, "table");

    createDataset(TIME_PARTITIONED, "timePartitionedFileSet", FileSetProperties.builder()
            // properties for file set
            .setBasePath("partitioned").setInputFormat(TextInputFormat.class)
            .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR)
            // don't configure properties for the Hive table - this is used in a context where explore is disabled
            .build());//w w w .  ja v a2 s  . c  o m
    addMapReduce(new PartitionWriter());
    addMapReduce(new PartitionReader());
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.AppWithMapReduceUsingMultipleInputs.java

License:Apache License

@Override
public void configure() {
    setName("AppWithMapReduceUsingMultipleInputs");
    setDescription("Application with MapReduce job using multiple inputs");
    addStream(PURCHASES);//  w  ww  . j a  va 2  s.  c o m
    createDataset(PURCHASES, "fileSet",
            FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());
    createDataset(CUSTOMERS, "fileSet",
            FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());
    createDataset(OUTPUT_DATASET, "fileSet", FileSetProperties.builder().setOutputFormat(TextOutputFormat.class)
            .setOutputProperty(TextOutputFormat.SEPERATOR, " ").build());
    addMapReduce(new ComputeSum());
    addMapReduce(new InvalidMapReduce());
}

From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.AppWithMapReduceUsingMultipleOutputs.java

License:Apache License

@Override
public void configure() {
    setName("AppWithMapReduceUsingMultipleOutputs");
    setDescription("Application with MapReduce job using multiple outputs");
    createDataset(PURCHASES, "fileSet",
            FileSetProperties.builder().setInputFormat(TextInputFormat.class).build());
    createDataset(SEPARATED_PURCHASES, "fileSet",
            FileSetProperties.builder().setOutputFormat(TextOutputFormat.class)
                    .setOutputProperty(TextOutputFormat.SEPERATOR, " ").build());
    addMapReduce(new SeparatePurchases());
    addMapReduce(new InvalidMapReduce());
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceProgramRunnerTest.java

License:Apache License

@Test
public void testMapreduceWithDynamicDatasets() throws Exception {
    Id.DatasetInstance rtInput1 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtInput1");
    Id.DatasetInstance rtInput2 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtInput2");
    Id.DatasetInstance rtOutput1 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtOutput1");
    // create the datasets here because they are not created by the app
    dsFramework.addInstance("fileSet", rtInput1,
            FileSetProperties.builder().setBasePath("rtInput1").setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                    .build());/*from   w w w.ja  v  a 2  s.  c  o m*/
    dsFramework.addInstance("fileSet", rtOutput1,
            FileSetProperties.builder().setBasePath("rtOutput1").setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                    .build());
    // build runtime args for app
    Map<String, String> runtimeArguments = Maps.newHashMap();
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_NAME, "rtInput1");
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_PATHS, "abc, xyz");
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_NAME, "rtOutput1");
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_PATH, "a001");
    // test reading and writing distinct datasets, reading more than one path
    testMapreduceWithFile("rtInput1", "abc, xyz", "rtOutput1", "a001",
            AppWithMapReduceUsingRuntimeDatasets.class, AppWithMapReduceUsingRuntimeDatasets.ComputeSum.class,
            new BasicArguments(runtimeArguments), AppWithMapReduceUsingRuntimeDatasets.COUNTERS);

    // validate that the table emitted metrics
    Collection<MetricTimeSeries> metrics = metricStore
            .query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE,
                    "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM,
                    ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getId(),
                            Constants.Metrics.Tag.APP, AppWithMapReduceUsingRuntimeDatasets.APP_NAME,
                            Constants.Metrics.Tag.MAPREDUCE, AppWithMapReduceUsingRuntimeDatasets.MR_NAME,
                            Constants.Metrics.Tag.DATASET, "rtt"),
                    Collections.<String>emptyList()));
    Assert.assertEquals(1, metrics.size());
    MetricTimeSeries ts = metrics.iterator().next();
    Assert.assertEquals(1, ts.getTimeValues().size());
    Assert.assertEquals(1, ts.getTimeValues().get(0).getValue());

    // test reading and writing same dataset
    dsFramework.addInstance("fileSet", rtInput2,
            FileSetProperties.builder().setBasePath("rtInput2").setInputFormat(TextInputFormat.class)
                    .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":")
                    .build());
    runtimeArguments = Maps.newHashMap();
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_NAME, "rtInput2");
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_PATHS, "zzz");
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_NAME, "rtInput2");
    runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_PATH, "f123");
    testMapreduceWithFile("rtInput2", "zzz", "rtInput2", "f123", AppWithMapReduceUsingRuntimeDatasets.class,
            AppWithMapReduceUsingRuntimeDatasets.ComputeSum.class, new BasicArguments(runtimeArguments),
            AppWithMapReduceUsingRuntimeDatasets.COUNTERS);
}