List of usage examples for org.apache.hadoop.mapreduce.lib.output TextOutputFormat SEPERATOR
String SEPERATOR
To view the source code for org.apache.hadoop.mapreduce.lib.output TextOutputFormat SEPERATOR.
Click Source Link
From source file:co.cask.cdap.datapipeline.mock.NaiveBayesTrainer.java
License:Apache License
@Override public void configurePipeline(PipelineConfigurer pipelineConfigurer) { pipelineConfigurer.addStream(TEXTS_TO_CLASSIFY); pipelineConfigurer.createDataset(CLASSIFIED_TEXTS, KeyValueTable.class); pipelineConfigurer.createDataset(config.fileSetName, FileSet.class, FileSetProperties.builder().setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build());// w w w. j a v a 2s. c om }
From source file:co.cask.cdap.examples.fileset.FileSetExample.java
License:Apache License
@Override public void configure() { setName("FileSetExample"); setDescription("Application with a MapReduce that uses a FileSet dataset"); createDataset("lines", FileSet.class, FileSetProperties.builder().setBasePath("example/data/lines").setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setDescription("Store input lines").build()); createDataset("counts", FileSet.class, FileSetProperties.builder().setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":") .setDescription("Store word counts").build()); addService(new FileSetService()); addMapReduce(new WordCount()); }
From source file:co.cask.cdap.examples.loganalysis.LogAnalysisApp.java
License:Apache License
@Override public void configure() { setDescription("CDAP Log Analysis App"); // A stream to ingest log data addStream(new Stream(LOG_STREAM)); // A Spark and MapReduce for processing log data addSpark(new ResponseCounterSpark()); addMapReduce(new HitCounterProgram()); addWorkflow(new LogAnalysisWorkflow()); // Services to query for result addService(HIT_COUNTER_SERVICE, new HitCounterServiceHandler()); addService(RESPONSE_COUNTER_SERVICE, new ResponseCounterHandler()); addService(REQUEST_COUNTER_SERVICE, new RequestCounterHandler()); // Datasets to store output after processing createDataset(RESPONSE_COUNT_STORE, KeyValueTable.class, DatasetProperties.builder().setDescription("Store response counts").build()); createDataset(HIT_COUNT_STORE, KeyValueTable.class, DatasetProperties.builder().setDescription("Store hit counts").build()); createDataset(REQ_COUNT_STORE, TimePartitionedFileSet.class, FileSetProperties.builder().setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").setDescription("Store request counts") .build());/*from ww w. ja v a 2 s . c o m*/ }
From source file:co.cask.cdap.examples.sportresults.SportResults.java
License:Apache License
@Override public void configure() { addService(new UploadService()); addMapReduce(new ScoreCounter()); // Create the "results" partitioned file set, configure it to work with MapReduce and with Explore createDataset("results", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("league").addIntField("season").build()) // Properties for file set .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true).setExploreFormat("csv") .setExploreSchema("date STRING, winner STRING, loser STRING, winnerpoints INT, loserpoints INT") .setDescription("FileSet dataset of game results for a sport league and season").build()); // Create the aggregates partitioned file set, configure it to work with MapReduce and with Explore createDataset("totals", PartitionedFileSet.class, PartitionedFileSetProperties.builder() // Properties for partitioning .setPartitioning(Partitioning.builder().addStringField("league").build()) // Properties for file set .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ",") // Properties for Explore (to create a partitioned Hive table) .setEnableExploreOnCreate(true).setExploreFormat("csv") .setExploreSchema("team STRING, wins INT, ties INT, losses INT, scored INT, conceded INT") .setDescription("FileSet dataset of aggregated results for each sport league").build()); }
From source file:co.cask.cdap.internal.app.runtime.batch.AppWithMapReduceUsingFileSet.java
License:Apache License
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(inputDataset, "fileSet", FileSetProperties.builder().setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build());//w w w. j a v a2 s. co m if (!outputDataset.equals(inputDataset)) { createDataset(outputDataset, "fileSet", FileSetProperties.builder().setBasePath("foo/my-file-output") .setInputFormat(TextInputFormat.class).setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, ":").build()); } addMapReduce(new ComputeSum()); }
From source file:co.cask.cdap.internal.app.runtime.batch.AppWithPartitionedFileSet.java
License:Apache License
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); createDataset(PARTITIONED, "partitionedFileSet", PartitionedFileSetProperties.builder() .setPartitioning(Partitioning.builder().addStringField("type").addLongField("time").build()) // properties for file set .setBasePath("partitioned").setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build());/*from w ww . java 2 s.c o m*/ addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
From source file:co.cask.cdap.internal.app.runtime.batch.AppWithTimePartitionedFileSet.java
License:Apache License
@Override public void configure() { setName("AppWithMapReduceUsingFile"); setDescription("Application with MapReduce job using file as dataset"); createDataset(INPUT, "table"); createDataset(OUTPUT, "table"); createDataset(TIME_PARTITIONED, "timePartitionedFileSet", FileSetProperties.builder() // properties for file set .setBasePath("partitioned").setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, SEPARATOR) // don't configure properties for the Hive table - this is used in a context where explore is disabled .build());//w w w . ja v a2 s . c o m addMapReduce(new PartitionWriter()); addMapReduce(new PartitionReader()); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.input.AppWithMapReduceUsingMultipleInputs.java
License:Apache License
@Override public void configure() { setName("AppWithMapReduceUsingMultipleInputs"); setDescription("Application with MapReduce job using multiple inputs"); addStream(PURCHASES);// w ww . j a va 2 s. c o m createDataset(PURCHASES, "fileSet", FileSetProperties.builder().setInputFormat(TextInputFormat.class).build()); createDataset(CUSTOMERS, "fileSet", FileSetProperties.builder().setInputFormat(TextInputFormat.class).build()); createDataset(OUTPUT_DATASET, "fileSet", FileSetProperties.builder().setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, " ").build()); addMapReduce(new ComputeSum()); addMapReduce(new InvalidMapReduce()); }
From source file:co.cask.cdap.internal.app.runtime.batch.dataset.output.AppWithMapReduceUsingMultipleOutputs.java
License:Apache License
@Override public void configure() { setName("AppWithMapReduceUsingMultipleOutputs"); setDescription("Application with MapReduce job using multiple outputs"); createDataset(PURCHASES, "fileSet", FileSetProperties.builder().setInputFormat(TextInputFormat.class).build()); createDataset(SEPARATED_PURCHASES, "fileSet", FileSetProperties.builder().setOutputFormat(TextOutputFormat.class) .setOutputProperty(TextOutputFormat.SEPERATOR, " ").build()); addMapReduce(new SeparatePurchases()); addMapReduce(new InvalidMapReduce()); }
From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceProgramRunnerTest.java
License:Apache License
@Test public void testMapreduceWithDynamicDatasets() throws Exception { Id.DatasetInstance rtInput1 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtInput1"); Id.DatasetInstance rtInput2 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtInput2"); Id.DatasetInstance rtOutput1 = Id.DatasetInstance.from(DefaultId.NAMESPACE, "rtOutput1"); // create the datasets here because they are not created by the app dsFramework.addInstance("fileSet", rtInput1, FileSetProperties.builder().setBasePath("rtInput1").setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build());/*from w w w.ja v a 2 s. c o m*/ dsFramework.addInstance("fileSet", rtOutput1, FileSetProperties.builder().setBasePath("rtOutput1").setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); // build runtime args for app Map<String, String> runtimeArguments = Maps.newHashMap(); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_NAME, "rtInput1"); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_PATHS, "abc, xyz"); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_NAME, "rtOutput1"); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_PATH, "a001"); // test reading and writing distinct datasets, reading more than one path testMapreduceWithFile("rtInput1", "abc, xyz", "rtOutput1", "a001", AppWithMapReduceUsingRuntimeDatasets.class, AppWithMapReduceUsingRuntimeDatasets.ComputeSum.class, new BasicArguments(runtimeArguments), AppWithMapReduceUsingRuntimeDatasets.COUNTERS); // validate that the table emitted metrics Collection<MetricTimeSeries> metrics = metricStore .query(new MetricDataQuery(0, System.currentTimeMillis() / 1000L, Integer.MAX_VALUE, "system." + Constants.Metrics.Name.Dataset.OP_COUNT, AggregationFunction.SUM, ImmutableMap.of(Constants.Metrics.Tag.NAMESPACE, DefaultId.NAMESPACE.getId(), Constants.Metrics.Tag.APP, AppWithMapReduceUsingRuntimeDatasets.APP_NAME, Constants.Metrics.Tag.MAPREDUCE, AppWithMapReduceUsingRuntimeDatasets.MR_NAME, Constants.Metrics.Tag.DATASET, "rtt"), Collections.<String>emptyList())); Assert.assertEquals(1, metrics.size()); MetricTimeSeries ts = metrics.iterator().next(); Assert.assertEquals(1, ts.getTimeValues().size()); Assert.assertEquals(1, ts.getTimeValues().get(0).getValue()); // test reading and writing same dataset dsFramework.addInstance("fileSet", rtInput2, FileSetProperties.builder().setBasePath("rtInput2").setInputFormat(TextInputFormat.class) .setOutputFormat(TextOutputFormat.class).setOutputProperty(TextOutputFormat.SEPERATOR, ":") .build()); runtimeArguments = Maps.newHashMap(); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_NAME, "rtInput2"); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.INPUT_PATHS, "zzz"); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_NAME, "rtInput2"); runtimeArguments.put(AppWithMapReduceUsingRuntimeDatasets.OUTPUT_PATH, "f123"); testMapreduceWithFile("rtInput2", "zzz", "rtInput2", "f123", AppWithMapReduceUsingRuntimeDatasets.class, AppWithMapReduceUsingRuntimeDatasets.ComputeSum.class, new BasicArguments(runtimeArguments), AppWithMapReduceUsingRuntimeDatasets.COUNTERS); }