Example usage for org.apache.hadoop.fs Path SEPARATOR

List of usage examples for org.apache.hadoop.fs Path SEPARATOR

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path SEPARATOR.

Prototype

String SEPARATOR

To view the source code for org.apache.hadoop.fs Path SEPARATOR.

Click Source Link

Document

The directory separator, a slash.

Usage

From source file:org.apache.gobblin.writer.ParquetHdfsDataWriterTest.java

License:Apache License

@Test
public void testWrite() throws Exception {
    long firstWrite;
    long secondWrite;
    List<Group> records;
    Group record1 = TestConstants.PARQUET_RECORD_1;
    Group record2 = TestConstants.PARQUET_RECORD_2;
    String filePath = TestConstants.TEST_OUTPUT_DIR + Path.SEPARATOR + this.filePath;
    File outputFile = new File(filePath, TestConstants.PARQUET_TEST_FILENAME);

    this.writer.write(record1);
    firstWrite = this.writer.recordsWritten();
    this.writer.write(record2);
    secondWrite = this.writer.recordsWritten();
    this.writer.close();
    this.writer.commit();
    records = readParquetFiles(outputFile);
    Group resultRecord1 = records.get(0);
    Group resultRecord2 = records.get(1);

    Assert.assertEquals(firstWrite, 1);//from   w w  w  .  ja v a  2 s.  com
    Assert.assertEquals(secondWrite, 2);
    Assert.assertEquals(resultRecord1.getString("name", 0), "tilak");
    Assert.assertEquals(resultRecord1.getInteger("age", 0), 22);
    Assert.assertEquals(resultRecord2.getString("name", 0), "other");
    Assert.assertEquals(resultRecord2.getInteger("age", 0), 22);
}

From source file:org.apache.gobblin.writer.partitioner.TimeBasedAvroWriterPartitionerTest.java

License:Apache License

/**
 * Test/*from  www .ja  va2s . c om*/
 *  1. Record timestamp of type long
 *  2. Partition path of a given record
 */
@Test
public void testWriter() throws IOException {

    Schema schema = getRecordSchema("long");
    State state = getBasicState();
    // Write three records, each should be written to a different file
    GenericRecordBuilder genericRecordBuilder = new GenericRecordBuilder(schema);

    DataWriter<GenericRecord> millisPartitionWriter = getWriter(schema, state);

    // This timestamp corresponds to 2015/01/01
    genericRecordBuilder.set("timestamp", 1420099200000l);
    millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));

    // This timestamp corresponds to 2015/01/02
    genericRecordBuilder.set("timestamp", 1420185600000l);
    millisPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));

    millisPartitionWriter.close();
    millisPartitionWriter.commit();
    // Check that the writer reports that 2 records have been written
    Assert.assertEquals(millisPartitionWriter.recordsWritten(), 2);

    state.setProp(TimeBasedWriterPartitioner.WRITER_PARTITION_TIMEUNIT, "seconds");
    DataWriter<GenericRecord> secsPartitionWriter = getWriter(schema, state);
    // This timestamp corresponds to 2015/01/03
    genericRecordBuilder.set("timestamp", 1420272000L);
    secsPartitionWriter.writeEnvelope(new RecordEnvelope<>(genericRecordBuilder.build()));
    secsPartitionWriter.close();
    secsPartitionWriter.commit();
    // Check that the writer reports that 1 record has been written
    Assert.assertEquals(secsPartitionWriter.recordsWritten(), 1);

    // Check that 3 files were created
    Assert.assertEquals(FileUtils.listFiles(new File(TEST_ROOT_DIR), new String[] { "avro" }, true).size(), 3);

    // Check if each file exists, and in the correct location
    File baseOutputDir = new File(OUTPUT_DIR, BASE_FILE_PATH);
    Assert.assertTrue(baseOutputDir.exists());

    File outputDir20150101 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "01" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150101.exists());

    File outputDir20150102 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "02" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150102.exists());

    File outputDir20150103 = new File(baseOutputDir,
            "2015" + Path.SEPARATOR + "01" + Path.SEPARATOR + "03" + Path.SEPARATOR + FILE_NAME);
    Assert.assertTrue(outputDir20150103.exists());
}

From source file:org.apache.gobblin.yarn.GobblinYarnAppLauncher.java

License:Apache License

/**
 * Launch a new Gobblin instance on Yarn.
 *
 * @throws IOException if there's something wrong launching the application
 * @throws YarnException if there's something wrong launching the application
 *///from ww  w  .ja  va2  s .c  om
public void launch() throws IOException, YarnException {
    this.eventBus.register(this);

    String clusterName = this.config.getString(GobblinClusterConfigurationKeys.HELIX_CLUSTER_NAME_KEY);
    HelixUtils.createGobblinHelixCluster(
            this.config.getString(GobblinClusterConfigurationKeys.ZK_CONNECTION_STRING_KEY), clusterName);
    LOGGER.info("Created Helix cluster " + clusterName);

    connectHelixManager();

    startYarnClient();

    this.applicationId = getApplicationId();

    this.applicationStatusMonitor.scheduleAtFixedRate(new Runnable() {
        @Override
        public void run() {
            try {
                eventBus.post(new ApplicationReportArrivalEvent(
                        yarnClient.getApplicationReport(applicationId.get())));
            } catch (YarnException | IOException e) {
                LOGGER.error(
                        "Failed to get application report for Gobblin Yarn application " + applicationId.get(),
                        e);
                eventBus.post(new GetApplicationReportFailureEvent(e));
            }
        }
    }, 0, this.appReportIntervalMinutes, TimeUnit.MINUTES);

    List<Service> services = Lists.newArrayList();
    if (this.config.hasPath(GobblinYarnConfigurationKeys.KEYTAB_FILE_PATH)) {
        LOGGER.info("Adding YarnAppSecurityManager since login is keytab based");
        services.add(buildYarnAppSecurityManager());
    }
    if (!this.config.hasPath(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY)
            || !this.config.getBoolean(GobblinYarnConfigurationKeys.LOG_COPIER_DISABLE_DRIVER_COPY)) {
        services.add(buildLogCopier(this.config,
                new Path(this.sinkLogRootDir,
                        this.applicationName + Path.SEPARATOR + this.applicationId.get().toString()),
                GobblinClusterUtils.getAppWorkDirPath(this.fs, this.applicationName,
                        this.applicationId.get().toString())));
    }
    if (config.getBoolean(ConfigurationKeys.JOB_EXECINFO_SERVER_ENABLED_KEY)) {
        LOGGER.info("Starting the job execution info server since it is enabled");
        Properties properties = ConfigUtils.configToProperties(config);
        JobExecutionInfoServer executionInfoServer = new JobExecutionInfoServer(properties);
        services.add(executionInfoServer);
        if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) {
            LOGGER.info("Starting the admin UI server since it is enabled");
            services.add(ServiceBasedAppLauncher.createAdminServer(properties,
                    executionInfoServer.getAdvertisedServerUri()));
        }
    } else if (config.getBoolean(ConfigurationKeys.ADMIN_SERVER_ENABLED_KEY)) {
        LOGGER.warn("NOT starting the admin UI because the job execution info server is NOT enabled");
    }

    this.serviceManager = Optional.of(new ServiceManager(services));
    // Start all the services running in the ApplicationMaster
    this.serviceManager.get().startAsync();
}

From source file:org.apache.gora.mapreduce.GoraOutputFormat.java

License:Apache License

private void setOutputPath(DataStore<K, T> store, TaskAttemptContext context) {
    if (store instanceof FileBackedDataStore) {
        FileBackedDataStore<K, T> fileStore = (FileBackedDataStore<K, T>) store;
        String uniqueName = FileOutputFormat.getUniqueFile(context, "part", "");

        //if file store output is not set, then get the output from FileOutputFormat
        if (fileStore.getOutputPath() == null) {
            fileStore.setOutputPath(FileOutputFormat.getOutputPath(context).toString());
        }//from  w ww . j  av  a  2 s  . c om

        //set the unique name of the data file
        String path = fileStore.getOutputPath();
        fileStore.setOutputPath(path + Path.SEPARATOR + uniqueName);
    }
}

From source file:org.apache.hcatalog.har.HarOutputCommitterPostProcessor.java

License:Apache License

public String getProcessedLocation(Path ptnPath) throws IOException {
    String harLocn = ("har://" + ptnPath.toUri().getPath()).replaceFirst("/+$", "") + ".har" + Path.SEPARATOR;
    //    LOG.info("har location : " + harLocn);
    return harLocn;
}

From source file:org.apache.hive.hcatalog.api.repl.commands.TestCommands.java

License:Apache License

@BeforeClass
public static void setUpBeforeClass() throws Exception {

    TestHCatClient.startMetaStoreServer();
    hconf = TestHCatClient.getConf();//www . j  a va  2  s.  c  om
    hconf.set(HiveConf.ConfVars.SEMANTIC_ANALYZER_HOOK.varname, "");
    hconf.setVar(HiveConf.ConfVars.HIVE_AUTHORIZATION_MANAGER,
            "org.apache.hadoop.hive.ql.security.authorization.plugin.sqlstd.SQLStdHiveAuthorizerFactory");
    TEST_PATH = System.getProperty("test.warehouse.dir", "/tmp") + Path.SEPARATOR
            + TestCommands.class.getCanonicalName() + "-" + System.currentTimeMillis();
    Path testPath = new Path(TEST_PATH);
    FileSystem fs = FileSystem.get(testPath.toUri(), hconf);
    fs.mkdirs(testPath);

    driver = new Driver(hconf);
    SessionState.start(new CliSessionState(hconf));
    client = HCatClient.create(hconf);
}

From source file:org.apache.hive.hcatalog.mapreduce.FileOutputCommitterContainer.java

License:Apache License

/**
 * Generate partition metadata object to be used to add to metadata.
 * @param context The job context./*from   w w w  .ja  v  a  2 s . c  o m*/
 * @param jobInfo The OutputJobInfo.
 * @param partLocnRoot The table-equivalent location root of the partition
 *                       (temporary dir if dynamic partition, table dir if static)
 * @param dynPartPath The path of dynamic partition which is created
 * @param partKVs The keyvalue pairs that form the partition
 * @param outputSchema The output schema for the partition
 * @param params The parameters to store inside the partition
 * @param table The Table metadata object under which this Partition will reside
 * @param fs FileSystem object to operate on the underlying filesystem
 * @param grpName Group name that owns the table dir
 * @param perms FsPermission that's the default permission of the table dir.
 * @return Constructed Partition metadata object
 * @throws java.io.IOException
 */

private Partition constructPartition(JobContext context, OutputJobInfo jobInfo, String partLocnRoot,
        String dynPartPath, Map<String, String> partKVs, HCatSchema outputSchema, Map<String, String> params,
        Table table, FileSystem fs, String grpName, FsPermission perms) throws IOException {

    Partition partition = new Partition();
    partition.setDbName(table.getDbName());
    partition.setTableName(table.getTableName());
    partition.setSd(new StorageDescriptor(table.getTTable().getSd()));

    List<FieldSchema> fields = new ArrayList<FieldSchema>();
    for (HCatFieldSchema fieldSchema : outputSchema.getFields()) {
        fields.add(HCatSchemaUtils.getFieldSchema(fieldSchema));
    }

    partition.getSd().setCols(fields);

    partition.setValues(FileOutputFormatContainer.getPartitionValueList(table, partKVs));

    partition.setParameters(params);

    // Sets permissions and group name on partition dirs and files.

    Path partPath;
    if (customDynamicLocationUsed) {
        partPath = new Path(dynPartPath);
    } else if (!dynamicPartitioningUsed && Boolean.parseBoolean((String) table.getProperty("EXTERNAL"))
            && jobInfo.getLocation() != null && jobInfo.getLocation().length() > 0) {
        // Now, we need to de-scratchify this location - i.e., get rid of any
        // _SCRATCH[\d].?[\d]+ from the location.
        String jobLocation = jobInfo.getLocation();
        String finalLocn = jobLocation.replaceAll(Path.SEPARATOR + SCRATCH_DIR_NAME + "\\d\\.?\\d+", "");
        partPath = new Path(finalLocn);
    } else {
        partPath = new Path(partLocnRoot);
        int i = 0;
        for (FieldSchema partKey : table.getPartitionKeys()) {
            if (i++ != 0) {
                fs.mkdirs(partPath); // Attempt to make the path in case it does not exist before we check
                applyGroupAndPerms(fs, partPath, perms, grpName, false);
            }
            partPath = constructPartialPartPath(partPath, partKey.getName().toLowerCase(), partKVs);
        }
    }

    // Apply the group and permissions to the leaf partition and files.
    // Need not bother in case of HDFS as permission is taken care of by setting UMask
    fs.mkdirs(partPath); // Attempt to make the path in case it does not exist before we check
    if (!ShimLoader.getHadoopShims().getHCatShim().isFileInHDFS(fs, partPath)) {
        applyGroupAndPerms(fs, partPath, perms, grpName, true);
    }

    // Set the location in the StorageDescriptor
    if (dynamicPartitioningUsed) {
        String dynamicPartitionDestination = getFinalDynamicPartitionDestination(table, partKVs, jobInfo);
        if (harProcessor.isEnabled()) {
            harProcessor.exec(context, partition, partPath);
            partition.getSd()
                    .setLocation(harProcessor.getProcessedLocation(new Path(dynamicPartitionDestination)));
        } else {
            partition.getSd().setLocation(dynamicPartitionDestination);
        }
    } else {
        partition.getSd().setLocation(partPath.toString());
    }
    return partition;
}

From source file:org.apache.hive.hcatalog.streaming.mutate.worker.MetaStorePartitionHelper.java

License:Apache License

/** Creates the specified partition if it does not already exist. Does nothing if the table is unpartitioned. */
@Override/*  www.  j a va  2 s .  co  m*/
public void createPartitionIfNotExists(List<String> newPartitionValues) throws WorkerException {
    if (newPartitionValues.isEmpty()) {
        return;
    }

    try {
        LOG.debug("Attempting to create partition (if not exists) {}.{}:{}", databaseName, tableName,
                newPartitionValues);
        Table table = metaStoreClient.getTable(databaseName, tableName);

        Partition partition = new Partition();
        partition.setDbName(table.getDbName());
        partition.setTableName(table.getTableName());
        StorageDescriptor partitionSd = new StorageDescriptor(table.getSd());
        partitionSd.setLocation(table.getSd().getLocation() + Path.SEPARATOR
                + Warehouse.makePartName(table.getPartitionKeys(), newPartitionValues));
        partition.setSd(partitionSd);
        partition.setValues(newPartitionValues);

        metaStoreClient.add_partition(partition);
    } catch (AlreadyExistsException e) {
        LOG.debug("Partition already exisits: {}.{}:{}", databaseName, tableName, newPartitionValues);
    } catch (NoSuchObjectException e) {
        LOG.error("Failed to create partition : " + newPartitionValues, e);
        throw new PartitionCreationException("Table not found '" + databaseName + "." + tableName + "'.", e);
    } catch (TException e) {
        LOG.error("Failed to create partition : " + newPartitionValues, e);
        throw new PartitionCreationException("Failed to create partition '" + newPartitionValues
                + "' on table '" + databaseName + "." + tableName + "'", e);
    }
}

From source file:org.apache.hive.hcatalog.streaming.TestStreaming.java

License:Apache License

private void createStoreSales(String dbName, String loc) throws Exception {
    String dbUri = "raw://" + new Path(loc).toUri().toString();
    String tableLoc = dbUri + Path.SEPARATOR + "store_sales";

    boolean success = runDDL(driver, "create database IF NOT EXISTS " + dbName + " location '" + dbUri + "'");
    Assert.assertTrue(success);/*from w  w w.j  a  v a2 s  . c  o m*/
    success = runDDL(driver, "use " + dbName);
    Assert.assertTrue(success);

    success = runDDL(driver, "drop table if exists store_sales");
    Assert.assertTrue(success);
    success = runDDL(driver, "create table store_sales\n" + "(\n" + "    ss_sold_date_sk           int,\n"
            + "    ss_sold_time_sk           int,\n" + "    ss_item_sk                int,\n"
            + "    ss_customer_sk            int,\n" + "    ss_cdemo_sk               int,\n"
            + "    ss_hdemo_sk               int,\n" + "    ss_addr_sk                int,\n"
            + "    ss_store_sk               int,\n" + "    ss_promo_sk               int,\n"
            + "    ss_ticket_number          int,\n" + "    ss_quantity               int,\n"
            + "    ss_wholesale_cost         decimal(7,2),\n" + "    ss_list_price             decimal(7,2),\n"
            + "    ss_sales_price            decimal(7,2),\n" + "    ss_ext_discount_amt       decimal(7,2),\n"
            + "    ss_ext_sales_price        decimal(7,2),\n" + "    ss_ext_wholesale_cost     decimal(7,2),\n"
            + "    ss_ext_list_price         decimal(7,2),\n" + "    ss_ext_tax                decimal(7,2),\n"
            + "    ss_coupon_amt             decimal(7,2),\n" + "    ss_net_paid               decimal(7,2),\n"
            + "    ss_net_paid_inc_tax       decimal(7,2),\n" + "    ss_net_profit             decimal(7,2)\n"
            + ")\n" + " partitioned by (dt string)\n" + "clustered by (ss_store_sk, ss_promo_sk)\n"
            + "INTO 4 BUCKETS stored as orc " + " location '" + tableLoc + "'"
            + "  TBLPROPERTIES ('orc.compress'='NONE', 'transactional'='true')");
    Assert.assertTrue(success);

    success = runDDL(driver, "alter table store_sales add partition(dt='2015')");
    Assert.assertTrue(success);
}

From source file:org.apache.hive.hcatalog.streaming.TestStreaming.java

License:Apache License

@Test
public void testStreamBucketingMatchesRegularBucketing() throws Exception {
    int bucketCount = 100;

    String dbUri = "raw://" + new Path(dbFolder.newFolder().toString()).toUri().toString();
    String tableLoc = "'" + dbUri + Path.SEPARATOR + "streamedtable" + "'";
    String tableLoc2 = "'" + dbUri + Path.SEPARATOR + "finaltable" + "'";
    String tableLoc3 = "'" + dbUri + Path.SEPARATOR + "nobucket" + "'";

    runDDL(driver, "create database testBucketing3");
    runDDL(driver, "use testBucketing3");
    runDDL(driver,//  w  w  w .  jav  a  2  s  .  c  o m
            "create table streamedtable ( key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
                    + bucketCount + " buckets  stored as orc  location " + tableLoc
                    + " TBLPROPERTIES ('transactional'='true')");
    //  In 'nobucket' table we capture bucketid from streamedtable to workaround a hive bug that prevents joins two identically bucketed tables
    runDDL(driver,
            "create table nobucket ( bucketid int, key1 string,key2 int,data string ) location " + tableLoc3);
    runDDL(driver,
            "create table finaltable ( bucketid int, key1 string,key2 int,data string ) clustered by ( key1,key2 ) into "
                    + bucketCount + " buckets  stored as orc location " + tableLoc2
                    + " TBLPROPERTIES ('transactional'='true')");

    String[] records = new String[] { "PSFAHYLZVC,29,EPNMA", "PPPRKWAYAU,96,VUTEE", "MIAOFERCHI,3,WBDSI",
            "CEGQAZOWVN,0,WCUZL", "XWAKMNSVQF,28,YJVHU", "XBWTSAJWME,2,KDQFO", "FUVLQTAXAY,5,LDSDG",
            "QTQMDJMGJH,6,QBOMA", "EFLOTLWJWN,71,GHWPS", "PEQNAOJHCM,82,CAAFI", "MOEKQLGZCP,41,RUACR",
            "QZXMCOPTID,37,LFLWE", "EYALVWICRD,13,JEZLC", "VYWLZAYTXX,16,DMVZX", "OSALYSQIXR,47,HNZVE",
            "JGKVHKCEGQ,25,KSCJB", "WQFMMYDHET,12,DTRWA", "AJOVAYZKZQ,15,YBKFO", "YAQONWCUAU,31,QJNHZ",
            "DJBXUEUOEB,35,IYCBL" };

    HiveEndPoint endPt = new HiveEndPoint(metaStoreURI, "testBucketing3", "streamedtable", null);
    String[] colNames1 = new String[] { "key1", "key2", "data" };
    DelimitedInputWriter wr = new DelimitedInputWriter(colNames1, ",", endPt);
    StreamingConnection connection = endPt.newConnection(false, "UT_" + Thread.currentThread().getName());

    TransactionBatch txnBatch = connection.fetchTransactionBatch(2, wr);
    txnBatch.beginNextTransaction();

    for (String record : records) {
        txnBatch.write(record.toString().getBytes());
    }

    txnBatch.commit();
    txnBatch.close();
    connection.close();

    ArrayList<String> res1 = queryTable(driver, "select row__id.bucketid, * from streamedtable order by key2");
    for (String re : res1) {
        System.out.println(re);
    }

    driver.run("insert into nobucket select row__id.bucketid,* from streamedtable");
    runDDL(driver, " insert into finaltable select * from nobucket");
    ArrayList<String> res2 = queryTable(driver,
            "select row__id.bucketid,* from finaltable where row__id.bucketid<>bucketid");
    for (String s : res2) {
        LOG.error(s);
    }
    Assert.assertTrue(res2.isEmpty());
}