List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:org.apache.gobblin.writer.ParquetHdfsDataWriterTest.java
License:Apache License
@Test public void testWrite() throws Exception { long firstWrite; long secondWrite; List<Group> records; Group record1 = TestConstants.PARQUET_RECORD_1; Group record2 = TestConstants.PARQUET_RECORD_2; String filePath = TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath; File outputFile = new File(filePath, TestConstants.PARQUET_TEST_FILENAME); this.writer.write(record1); firstWrite = this.writer.recordsWritten(); this.writer.write(record2); secondWrite = this.writer.recordsWritten(); this.writer.close(); this.writer.commit(); records = readParquetFiles(outputFile); Group resultRecord1 = records.get(0); Group resultRecord2 = records.get(1); Assert.assertEquals(firstWrite, 1);//from w w w . ja v a 2 s. com Assert.assertEquals(secondWrite, 2); Assert.assertEquals(resultRecord1.getString("name", 0), "tilak"); Assert.assertEquals(resultRecord1.getInteger("age", 0), 22); Assert.assertEquals(resultRecord2.getString("name", 0), "other"); Assert.assertEquals(resultRecord2.getInteger("age", 0), 22); }
From source file:org.apache.gobblin.writer.partitioner.TimeBasedAvroWriterPartitionerTest.java
License:Apache License
/** * Test/*from www .ja va2s . c om*/ * 1. Record timestamp of type long * 2. Partition path of a given record */ @Test public void testWriter() throws IOException { Schema schema = getRecordSchema("long"); State state = getBasicState(); // Write three records, each should be written to a different file GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema); DataWriter<GenericRecord> millisPartitionWriter = getWriter(schema, state); // This timestamp corresponds to 2015/01/01 genericRecordBuilder.set("timestamp", 1420099200000l); millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build())); // This timestamp corresponds to 2015/01/02 genericRecordBuilder.set("timestamp", 1420185600000l); millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build())); millisPartitionWriter.close(); millisPartitionWriter.commit(); // Check that the writer reports that 2 records have been written Assert.assertEquals(millisPartitionWriter.recordsWritten(), 2); state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_TIMEUNIT, "seconds"); DataWriter<GenericRecord> secsPartitionWriter = getWriter(schema, state); // This timestamp corresponds to 2015/01/03 genericRecordBuilder.set("timestamp", 1420272000L); secsPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build())); secsPartitionWriter.close(); secsPartitionWriter.commit(); // Check that the writer reports that 1 record has been written Assert.assertEquals(secsPartitionWriter.recordsWritten(), 1); // Check that 3 files were created Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3); // Check if each file exists, and in the correct location File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH); Assert.assertTrue(baseOutputDir.exists()); File outputDir20150101 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150101.exists()); File outputDir20150102 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150102.exists()); File outputDir20150103 = new File(baseOutputDir, "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME); Assert.assertTrue(outputDir20150103.exists()); }
From source file:org.apache.gobblin.yarn.GobblinYarnAppLauncher.java
License:Apache License
/** * Launch a new Gobblin instance on Yarn. * * @throws IOException if there's something wrong launching the application * @throws YarnException if there's something wrong launching the application *///from ww w .ja va2 s .c om public void launch() throws IOException, YarnException { this.eventBus.register(this); String clusterName = this.config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY); HelixUtils.createGobblinHelixCluster( this.config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY), clusterName); LOGGER.info("Created Helix cluster " + clusterName); connectHelixManager(); startYarnClient(); this.applicationId = getApplicationId(); this.applicationStatusMonitor.scheduleAtFixedRate(new Runnable() { @Override public void run() { try { eventBus.post(new ApplicationReportArrivalEvent( yarnClient.getApplicationReport(applicationId.get()))); } catch (YarnException | IOException e) { LOGGER.error( "Failed to get application report for Gobblin Yarn application " + applicationId.get(), e); eventBus.post(new GetApplicationReportFailureEvent(e)); } } }, 0, this.appReportIntervalMinutes, TimeUnit.MINUTES); List<Service> services = Lists.newArrayList(); if (this.config.hasPath(GobblinYarnConfigurationKeys.KEYTAB_FILE_PATH)) { LOGGER.info("Adding YarnAppSecurityManager since login is keytab based"); services.add(buildYarnAppSecurityManager()); } if (!this.config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY) || !this.config.getBoolean(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY)) { services.add(buildLogCopier(this.config, new Path(this.sinkLogRootDir, this.applicationName + Path.SEPARATOR + this.applicationId.get().toString()), GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName, this.applicationId.get().toString()))); } if (config.getBoolean(ConfigurationKeys.JOB_EXECINFO_SERVER_ENABLED_KEY)) { LOGGER.info("Starting the job execution info server since it is enabled"); Properties properties = ConfigUtils.configToProperties(config); JobExecutionInfoServer executionInfoServer = new JobExecutionInfoServer(properties); services.add(executionInfoServer); if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) { LOGGER.info("Starting the admin UI server since it is enabled"); services.add(ServiceBasedAppLauncher.createAdminServer(properties, executionInfoServer.getAdvertisedServerUri())); } } else if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) { LOGGER.warn("NOT starting the admin UI because the job execution info server is NOT enabled"); } this.serviceManager = Optional.of(new ServiceManager(services)); // Start all the services running in the ApplicationMaster this.serviceManager.get().startAsync(); }
From source file:org.apache.gora.mapreduce.GoraOutputFormat.java
License:Apache License
private void setOutputPath(DataStore<K, T> store, TaskAttemptContext context) { if (store instanceof FileBackedDataStore) { FileBackedDataStore<K, T> fileStore = (FileBackedDataStore<K, T>) store; String uniqueName = FileOutputFormat.getUniqueFile(context, "part", ""); //if file store output is not set, then get the output from FileOutputFormat if (fileStore.getOutputPath() == null) { fileStore.setOutputPath(FileOutputFormat.getOutputPath(context).toString()); }//from w ww . j av a 2 s . c om //set the unique name of the data file String path = fileStore.getOutputPath(); fileStore.setOutputPath(path + Path.SEPARATOR + uniqueName); } }
From source file:org.apache.hcatalog.har.HarOutputCommitterPostProcessor.java
License:Apache License
public String getProcessedLocation(Path ptnPath) throws IOException { String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR; // LOG.info("har location : " + harLocn); return harLocn; }
From source file:org.apache.hive.hcatalog.api.repl.commands.TestCommands.java
License:Apache License
@BeforeClass public static void setUpBeforeClass() throws Exception { TestHCatClient.startMetaStoreServer(); hconf = TestHCatClient.getConf();//www . j a va 2 s. c om hconf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, ""); hconf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER, "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory"); TEST_PATH = System.getProperty("test.warehouse.dir", "/tmp") + Path.SEPARATOR + TestCommands.class.getCanonicalName() + "-" + System.currentTimeMillis(); Path testPath = new Path(TEST_PATH); FileSystem fs = FileSystem.get(testPath.toUri(), hconf); fs.mkdirs(testPath); driver = new Driver(hconf); SessionState.start(new CliSessionState(hconf)); client = HCatClient.create(hconf); }
From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java
License:Apache License
/** * Generate partition metadata object to be used to add to metadata. * @param context The job context./*from w w w .ja v a 2 s . c o m*/ * @param jobInfo The OutputJobInfo. * @param partLocnRoot The table-equivalent location root of the partition * (temporary dir if dynamic partition, table dir if static) * @param dynPartPath The path of dynamic partition which is created * @param partKVs The keyvalue pairs that form the partition * @param outputSchema The output schema for the partition * @param params The parameters to store inside the partition * @param table The Table metadata object under which this Partition will reside * @param fs FileSystem object to operate on the underlying filesystem * @param grpName Group name that owns the table dir * @param perms FsPermission that's the default permission of the table dir. * @return Constructed Partition metadata object * @throws java.io.IOException */ private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot, String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params, Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException { Partition partition = new Partition(); partition.setDbName(table.getDbName()); partition.setTableName(table.getTableName()); partition.setSd(new StorageDescriptor(table.getTTable().getSd())); List<FieldSchema> fields = new ArrayList<FieldSchema>(); for (HCatFieldSchema fieldSchema : outputSchema.getFields()) { fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema)); } partition.getSd().setCols(fields); partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs)); partition.setParameters(params); // Sets permissions and group name on partition dirs and files. Path partPath; if (customDynamicLocationUsed) { partPath = new Path(dynPartPath); } else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL")) && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) { // Now, we need to de-scratchify this location - i.e., get rid of any // _SCRATCH[\d].?[\d]+ from the location. String jobLocation = jobInfo.getLocation(); String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", ""); partPath = new Path(finalLocn); } else { partPath = new Path(partLocnRoot); int i = 0; for (FieldSchema partKey : table.getPartitionKeys()) { if (i++ != 0) { fs.mkdirs(partPath); // Attempt to make the path in case it does not exist before we check applyGroupAndPerms(fs, partPath, perms, grpName, false); } partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs); } } // Apply the group and permissions to the leaf partition and files. // Need not bother in case of HDFS as permission is taken care of by setting UMask fs.mkdirs(partPath); // Attempt to make the path in case it does not exist before we check if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) { applyGroupAndPerms(fs, partPath, perms, grpName, true); } // Set the location in the StorageDescriptor if (dynamicPartitioningUsed) { String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo); if (harProcessor.isEnabled()) { harProcessor.exec(context, partition, partPath); partition.getSd() .setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination))); } else { partition.getSd().setLocation(dynamicPartitionDestination); } } else { partition.getSd().setLocation(partPath.toString()); } return partition; }
From source file:org.apache.hive.hcatalog.streaming.mutate.worker.MetaStorePartitionHelper.java
License:Apache License
/** Creates the specified partition if it does not already exist. Does nothing if the table is unpartitioned. */ @Override/* www. j a va 2 s . co m*/ public void createPartitionIfNotExists(List<String> newPartitionValues) throws WorkerException { if (newPartitionValues.isEmpty()) { return; } try { LOG.debug("Attempting to create partition (if not exists) {}.{}:{}", databaseName, tableName, newPartitionValues); Table table = metaStoreClient.getTable(databaseName, tableName); Partition partition = new Partition(); partition.setDbName(table.getDbName()); partition.setTableName(table.getTableName()); StorageDescriptor partitionSd = new StorageDescriptor(table.getSd()); partitionSd.setLocation(table.getSd().getLocation() + Path.SEPARATOR + Warehouse.makePartName(table.getPartitionKeys(), newPartitionValues)); partition.setSd(partitionSd); partition.setValues(newPartitionValues); metaStoreClient.add_partition(partition); } catch (AlreadyExistsException e) { LOG.debug("Partition already exisits: {}.{}:{}", databaseName, tableName, newPartitionValues); } catch (NoSuchObjectException e) { LOG.error("Failed to create partition : " + newPartitionValues, e); throw new PartitionCreationException("Table not found '" + databaseName + "." + tableName + "'.", e); } catch (TException e) { LOG.error("Failed to create partition : " + newPartitionValues, e); throw new PartitionCreationException("Failed to create partition '" + newPartitionValues + "' on table '" + databaseName + "." + tableName + "'", e); } }
From source file:org.apache.hive.hcatalog.streaming.TestStreaming.java
License:Apache License
private void createStoreSales(String dbName, String loc) throws Exception { String dbUri = "raw://" + new Path(loc).toUri().toString(); String tableLoc = dbUri + Path.SEPARATOR + "store_sales"; boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'"); Assert.assertTrue(success);/*from w w w.j a v a2 s . c o m*/ success = runDDL(driver, "use " + dbName); Assert.assertTrue(success); success = runDDL(driver, "drop table if exists store_sales"); Assert.assertTrue(success); success = runDDL(driver, "create table store_sales\n" + "(\n" + " ss_sold_date_sk int,\n" + " ss_sold_time_sk int,\n" + " ss_item_sk int,\n" + " ss_customer_sk int,\n" + " ss_cdemo_sk int,\n" + " ss_hdemo_sk int,\n" + " ss_addr_sk int,\n" + " ss_store_sk int,\n" + " ss_promo_sk int,\n" + " ss_ticket_number int,\n" + " ss_quantity int,\n" + " ss_wholesale_cost decimal(7,2),\n" + " ss_list_price decimal(7,2),\n" + " ss_sales_price decimal(7,2),\n" + " ss_ext_discount_amt decimal(7,2),\n" + " ss_ext_sales_price decimal(7,2),\n" + " ss_ext_wholesale_cost decimal(7,2),\n" + " ss_ext_list_price decimal(7,2),\n" + " ss_ext_tax decimal(7,2),\n" + " ss_coupon_amt decimal(7,2),\n" + " ss_net_paid decimal(7,2),\n" + " ss_net_paid_inc_tax decimal(7,2),\n" + " ss_net_profit decimal(7,2)\n" + ")\n" + " partitioned by (dt string)\n" + "clustered by (ss_store_sk, ss_promo_sk)\n" + "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'" + " TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')"); Assert.assertTrue(success); success = runDDL(driver, "alter table store_sales add partition(dt='2015')"); Assert.assertTrue(success); }
From source file:org.apache.hive.hcatalog.streaming.TestStreaming.java
License:Apache License
@Test public void testStreamBucketingMatchesRegularBucketing() throws Exception { int bucketCount = 100; String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString(); String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'"; String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'"; String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'"; runDDL(driver, "create database testBucketing3"); runDDL(driver, "use testBucketing3"); runDDL(driver,// w w w . jav a 2 s . c o m "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into " + bucketCount + " buckets stored as orc location " + tableLoc + " TBLPROPERTIES ('transactional'='true')"); // In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables runDDL(driver, "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3); runDDL(driver, "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into " + bucketCount + " buckets stored as orc location " + tableLoc2 + " TBLPROPERTIES ('transactional'='true')"); String[] records = new String[] { "PSFAHYLZVC,29,EPNMA", "PPPRKWAYAU,96,VUTEE", "MIAOFERCHI,3,WBDSI", "CEGQAZOWVN,0,WCUZL", "XWAKMNSVQF,28,YJVHU", "XBWTSAJWME,2,KDQFO", "FUVLQTAXAY,5,LDSDG", "QTQMDJMGJH,6,QBOMA", "EFLOTLWJWN,71,GHWPS", "PEQNAOJHCM,82,CAAFI", "MOEKQLGZCP,41,RUACR", "QZXMCOPTID,37,LFLWE", "EYALVWICRD,13,JEZLC", "VYWLZAYTXX,16,DMVZX", "OSALYSQIXR,47,HNZVE", "JGKVHKCEGQ,25,KSCJB", "WQFMMYDHET,12,DTRWA", "AJOVAYZKZQ,15,YBKFO", "YAQONWCUAU,31,QJNHZ", "DJBXUEUOEB,35,IYCBL" }; HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null); String[] colNames1 = new String[] { "key1", "key2", "data" }; DelimitedInputWriter wr = new DelimitedInputWriter(colNames1, ",", endPt); StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName()); TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr); txnBatch.beginNextTransaction(); for (String record : records) { txnBatch.write(record.toString().getBytes()); } txnBatch.commit(); txnBatch.close(); connection.close(); ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2"); for (String re : res1) { System.out.println(re); } driver.run("insert into nobucket select row__id.bucketid,* from streamedtable"); runDDL(driver, " insert into finaltable select * from nobucket"); ArrayList<String> res2 = queryTable(driver, "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid"); for (String s : res2) { LOG.error(s); } Assert.assertTrue(res2.isEmpty()); }