List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:fr.ens.biologie.genomique.eoulsan.modules.mgmt.hadoop.DistCp.java
License:LGPL
/** * Make a path relative with respect to a root path. absPath is always assumed * to descend from root. Otherwise returned path is null. *///ww w.j a v a 2 s.co m static String makeRelative(final Path root, final Path absPath) { if (!absPath.isAbsolute()) { throw new IllegalArgumentException("!absPath.isAbsolute(), absPath=" + absPath); } String p = absPath.toUri().getPath(); StringTokenizer pathTokens = new StringTokenizer(p, "/"); for (StringTokenizer rootTokens = new StringTokenizer(root.toUri().getPath(), "/"); rootTokens .hasMoreTokens();) { if (!rootTokens.nextToken().equals(pathTokens.nextToken())) { return null; } } StringBuilder sb = new StringBuilder(); for (; pathTokens.hasMoreTokens();) { sb.append(pathTokens.nextToken()); if (pathTokens.hasMoreTokens()) { sb.append(Path.SEPARATOR); } } return sb.length() == 0 ? "." : sb.toString(); }
From source file:fr.jetoile.hadoopunit.component.OozieBootstrap.java
License:Apache License
public void createShareLib() { if (!oozieShareLibCreate) { LOGGER.info("OOZIE: Share Lib Create Disabled... skipping"); } else {/* w w w . ja v a 2 s.co m*/ try { // Get and extract the oozie release String oozieExtractTempDir = extractOozieTarFileToTempDir( new File(oozieShareLibPath + Path.SEPARATOR + oozieShareLibName)); // Extract the sharelib tarball to a temp dir fullOozieShareLibTarFilePath = oozieExtractTempDir + Path.SEPARATOR + "oozie-" + getOozieVersionFromOozieTarFileName() + Path.SEPARATOR + "oozie-sharelib-" + getOozieVersionFromOozieTarFileName() + ".tar.gz"; ; oozieShareLibExtractTempDir = extractOozieShareLibTarFileToTempDir( new File(fullOozieShareLibTarFilePath)); // Copy the sharelib into HDFS Path destPath = new Path( oozieHdfsShareLibDir + Path.SEPARATOR + SHARE_LIB_PREFIX + getTimestampDirectory()); LOGGER.info("OOZIE: Writing share lib contents to: {}", destPath); FileSystem hdfsFileSystem = null; try { hdfsFileSystem = ((HdfsBootstrap) HadoopBootstrap.INSTANCE.getService(Component.HDFS)) .getHdfsFileSystemHandle(); } catch (Exception e) { LOGGER.error("unable to get hdfs client"); } hdfsFileSystem.copyFromLocalFile(false, new Path(new File(oozieShareLibExtractTempDir).toURI()), destPath); // if (purgeLocalShareLibCache) { // FileUtils.deleteDirectory(new File(shareLibCacheDir)); // } } catch (IOException e) { LOGGER.error("unable to copy oozie sharelib into hdfs"); } } }
From source file:fr.jetoile.hadoopunit.HadoopUtils.java
License:Apache License
public static void setHadoopHome() { // Set hadoop.home.dir to point to the windows lib dir if (System.getProperty("os.name").startsWith("Windows")) { if (StringUtils.isEmpty(System.getenv("HADOOP_HOME"))) { try { configuration = new PropertiesConfiguration(HadoopUnitConfig.DEFAULT_PROPS_FILE); } catch (ConfigurationException e) { LOG.error("unable to load {}", HadoopUnitConfig.DEFAULT_PROPS_FILE, e); }//from w w w . j av a 2 s . c om String hadoop_home = configuration.getString("HADOOP_HOME"); LOG.info("Setting hadoop.home.dir: {}", hadoop_home); if (hadoop_home == null) { LOG.error("HADOOP_HOME should be set or informed into hadoop-unit-default.properties"); System.exit(-1); } else { System.setProperty("HADOOP_HOME", hadoop_home); } } else { System.setProperty("HADOOP_HOME", System.getenv("HADOOP_HOME")); } String windowsLibDir = System.getenv("HADOOP_HOME"); LOG.info("WINDOWS: Setting hadoop.home.dir: {}", windowsLibDir); System.setProperty("hadoop.home.dir", windowsLibDir); System.load(new File(windowsLibDir + Path.SEPARATOR + "bin" + Path.SEPARATOR + "hadoop.dll") .getAbsolutePath()); System.load(new File(windowsLibDir + Path.SEPARATOR + "bin" + Path.SEPARATOR + "hdfs.dll") .getAbsolutePath()); } }
From source file:gobblin.cluster.GobblinClusterUtils.java
License:Apache License
/** * Get the application working directory {@link String}. * * @param applicationName the application name * @param applicationId the application ID in string form * @return the cluster application working directory {@link String} *//*from w w w . j a va 2 s . co m*/ public static String getAppWorkDirPath(String applicationName, String applicationId) { return applicationName + Path.SEPARATOR + applicationId; }
From source file:gobblin.cluster.GobblinHelixJobLauncher.java
License:Apache License
public GobblinHelixJobLauncher(Properties jobProps, final HelixManager helixManager, Path appWorkDir, List<? extends Tag<?>> metadataTags) throws Exception { super(jobProps, metadataTags); this.helixManager = helixManager; this.helixTaskDriver = new TaskDriver(this.helixManager); this.appWorkDir = appWorkDir; this.inputWorkUnitDir = new Path(appWorkDir, GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME); this.outputTaskStateDir = new Path(this.appWorkDir, GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME + Path.SEPARATOR + this.jobContext.getJobId()); this.helixQueueName = this.jobContext.getJobName(); this.jobResourceName = TaskUtil.getNamespacedJobName(this.helixQueueName, this.jobContext.getJobId()); this.jobContext.getJobState().setJobLauncherType(LauncherTypeEnum.CLUSTER); this.stateSerDeRunnerThreads = Integer .parseInt(jobProps.getProperty(ParallelRunner.PARALLEL_RUNNER_THREADS_KEY, Integer.toString(ParallelRunner.DEFAULT_PARALLEL_RUNNER_THREADS))); jobConfig = ConfigUtils.propertiesToConfig(jobProps); this.stateStores = new StateStores(jobConfig, appWorkDir, GobblinClusterConfigurationKeys.OUTPUT_TASK_STATE_DIR_NAME, appWorkDir, GobblinClusterConfigurationKeys.INPUT_WORK_UNIT_DIR_NAME); URI fsUri = URI.create(jobProps.getProperty(ConfigurationKeys.FS_URI_KEY, ConfigurationKeys.LOCAL_FS_URI)); this.fs = FileSystem.get(fsUri, new Configuration()); this.taskStateCollectorService = new TaskStateCollectorService(jobProps, this.jobContext.getJobState(), this.eventBus, this.stateStores.taskStateStore, outputTaskStateDir); }
From source file:gobblin.cluster.GobblinHelixTaskTest.java
License:Apache License
private void prepareWorkUnit(WorkUnit workUnit) { workUnit.setProp(ConfigurationKeys.TASK_ID_KEY, TestHelper.TEST_TASK_ID); workUnit.setProp(ConfigurationKeys.TASK_KEY_KEY, Long.toString(Id.parse(TestHelper.TEST_TASK_ID).getSequence())); workUnit.setProp(ConfigurationKeys.SOURCE_CLASS_KEY, SimpleJsonSource.class.getName()); workUnit.setProp(ConfigurationKeys.CONVERTER_CLASSES_KEY, SimpleJsonConverter.class.getName()); workUnit.setProp(ConfigurationKeys.WRITER_OUTPUT_FORMAT_KEY, WriterOutputFormat.AVRO.toString()); workUnit.setProp(ConfigurationKeys.WRITER_DESTINATION_TYPE_KEY, Destination.DestinationType.HDFS.toString()); workUnit.setProp(ConfigurationKeys.WRITER_STAGING_DIR, this.appWorkDir.toString() + Path.SEPARATOR + "staging"); workUnit.setProp(ConfigurationKeys.WRITER_OUTPUT_DIR, this.taskOutputDir.toString()); workUnit.setProp(ConfigurationKeys.WRITER_FILE_NAME, TestHelper.WRITER_FILE_NAME); workUnit.setProp(ConfigurationKeys.WRITER_FILE_PATH, TestHelper.REL_WRITER_FILE_PATH); workUnit.setProp(ConfigurationKeys.WRITER_BUILDER_CLASS, AvroDataWriterBuilder.class.getName()); workUnit.setProp(ConfigurationKeys.SOURCE_SCHEMA, TestHelper.SOURCE_SCHEMA); }
From source file:gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java
License:Apache License
/** * Populate the avro to orc conversion queries. The Queries will be added to {@link QueryBasedHiveConversionEntity#getQueries()} *///w w w.jav a 2s . c om @Override public Iterable<QueryBasedHiveConversionEntity> convertRecord(Schema outputAvroSchema, QueryBasedHiveConversionEntity conversionEntity, WorkUnitState workUnit) throws DataConversionException { Preconditions.checkNotNull(outputAvroSchema, "Avro schema must not be null"); Preconditions.checkNotNull(conversionEntity, "Conversion entity must not be null"); Preconditions.checkNotNull(workUnit, "Workunit state must not be null"); Preconditions.checkNotNull(conversionEntity.getHiveTable(), "Hive table within conversion entity must not be null"); EventWorkunitUtils.setBeginDDLBuildTimeMetadata(workUnit, System.currentTimeMillis()); this.hiveDataset = conversionEntity.getConvertibleHiveDataset(); if (!hasConversionConfig()) { return new SingleRecordIterable<>(conversionEntity); } // Avro table name and location String avroTableName = conversionEntity.getHiveTable().getTableName(); // ORC table name and location String orcTableName = getConversionConfig().getDestinationTableName(); String orcStagingTableName = getOrcStagingTableName(getConversionConfig().getDestinationStagingTableName()); String orcTableDatabase = getConversionConfig().getDestinationDbName(); String orcDataLocation = getOrcDataLocation(); String orcStagingDataLocation = getOrcStagingDataLocation(orcStagingTableName); boolean isEvolutionEnabled = getConversionConfig().isEvolutionEnabled(); Pair<Optional<Table>, Optional<List<Partition>>> destinationMeta = getDestinationTableMeta(orcTableDatabase, orcTableName, workUnit); Optional<Table> destinationTableMeta = destinationMeta.getLeft(); // Optional // wrapperViewName : If specified view with 'wrapperViewName' is created if not already exists // over destination table // isUpdateViewAlwaysEnabled: If false 'wrapperViewName' is only updated when schema evolves; if true // 'wrapperViewName' is always updated (everytime publish happens) Optional<String> wrapperViewName = getConversionConfig().getDestinationViewName(); boolean shouldUpdateView = getConversionConfig().isUpdateViewAlwaysEnabled(); Optional<List<String>> clusterBy = getConversionConfig().getClusterBy().isEmpty() ? Optional.<List<String>>absent() : Optional.of(getConversionConfig().getClusterBy()); Optional<Integer> numBuckets = getConversionConfig().getNumBuckets(); Optional<Integer> rowLimit = getConversionConfig().getRowLimit(); Properties tableProperties = getConversionConfig().getDestinationTableProperties(); // Partition dir hint helps create different directory for hourly and daily partition with same timestamp, such as: // .. daily_2016-01-01-00 and hourly_2016-01-01-00 // This helps existing hourly data from not being deleted at the time of roll up, and so Hive queries in flight // .. do not fail List<String> sourceDataPathIdentifier = getConversionConfig().getSourceDataPathIdentifier(); // Populate optional partition info Map<String, String> partitionsDDLInfo = Maps.newHashMap(); Map<String, String> partitionsDMLInfo = Maps.newHashMap(); populatePartitionInfo(conversionEntity, partitionsDDLInfo, partitionsDMLInfo); /* * Create ORC data location with the same permissions as Avro data * * Note that hive can also automatically create the non-existing directories but it does not * seem to create it with the desired permissions. * According to hive docs permissions for newly created directories/files can be controlled using uMask like, * * SET hive.warehouse.subdir.inherit.perms=false; * SET fs.permissions.umask-mode=022; * Upon testing, this did not work */ try { FileStatus sourceDataFileStatus = this.fs .getFileStatus(conversionEntity.getHiveTable().getDataLocation()); FsPermission sourceDataPermission = sourceDataFileStatus.getPermission(); if (!this.fs.mkdirs(new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission)) { throw new RuntimeException(String.format("Failed to create path %s with permissions %s", new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission)); } else { this.fs.setPermission(new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission); // Set the same group as source if (!workUnit.getPropAsBoolean(HIVE_DATASET_DESTINATION_SKIP_SETGROUP, DEFAULT_HIVE_DATASET_DESTINATION_SKIP_SETGROUP)) { this.fs.setOwner(new Path(getConversionConfig().getDestinationDataPath()), null, sourceDataFileStatus.getGroup()); } log.info(String.format("Created %s with permissions %s and group %s", new Path(getConversionConfig().getDestinationDataPath()), sourceDataPermission, sourceDataFileStatus.getGroup())); } } catch (IOException e) { Throwables.propagate(e); } // Set hive runtime properties from conversion config for (Map.Entry<Object, Object> entry : getConversionConfig().getHiveRuntimeProperties().entrySet()) { conversionEntity.getQueries().add(String.format("SET %s=%s", entry.getKey(), entry.getValue())); } // Set hive runtime properties for tracking conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_DATASET_URN_KEY, conversionEntity.getHiveTable().getCompleteName())); if (conversionEntity.getHivePartition().isPresent()) { conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_PARTITION_NAME_KEY, conversionEntity.getHivePartition().get().getCompleteName())); } conversionEntity.getQueries().add(String.format("SET %s=%s", GOBBLIN_WORKUNIT_CREATE_TIME_KEY, workUnit.getWorkunit().getProp(SlaEventKeys.ORIGIN_TS_IN_MILLI_SECS_KEY))); // Create DDL statement for table Map<String, String> hiveColumns = new LinkedHashMap<>(); String createStagingTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(outputAvroSchema, orcStagingTableName, orcStagingDataLocation, Optional.of(orcTableDatabase), Optional.of(partitionsDDLInfo), clusterBy, Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), numBuckets, Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), tableProperties, isEvolutionEnabled, destinationTableMeta, hiveColumns); conversionEntity.getQueries().add(createStagingTableDDL); log.debug("Create staging table DDL: " + createStagingTableDDL); // Create DDL statement for partition String orcStagingDataPartitionDirName = getOrcStagingDataPartitionDirName(conversionEntity, sourceDataPathIdentifier); String orcStagingDataPartitionLocation = orcStagingDataLocation + Path.SEPARATOR + orcStagingDataPartitionDirName; if (partitionsDMLInfo.size() > 0) { List<String> createStagingPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL( orcTableDatabase, orcStagingTableName, orcStagingDataPartitionLocation, partitionsDMLInfo); conversionEntity.getQueries().addAll(createStagingPartitionDDL); log.debug("Create staging partition DDL: " + createStagingPartitionDDL); } // Create DML statement String insertInORCStagingTableDML = HiveAvroORCQueryGenerator.generateTableMappingDML( conversionEntity.getHiveTable().getAvroSchema(), outputAvroSchema, avroTableName, orcStagingTableName, Optional.of(conversionEntity.getHiveTable().getDbName()), Optional.of(orcTableDatabase), Optional.of(partitionsDMLInfo), Optional.<Boolean>absent(), Optional.<Boolean>absent(), isEvolutionEnabled, destinationTableMeta, rowLimit); conversionEntity.getQueries().add(insertInORCStagingTableDML); log.debug("Conversion staging DML: " + insertInORCStagingTableDML); // TODO: Split this method into two (conversion and publish) // Addition to WUS for Staging publish: // A. Evolution turned on: // 1. If table does not exists: simply create it (now it should exist) // 2. If table exists: // 2.1 Evolve table (alter table) // 2.2 If snapshot table: // 2.2.1 Delete data in final table directory // 2.2.2 Move data from staging to final table directory // 2.2.3 Drop this staging table and delete directories // 2.3 If partitioned table, move partitions from staging to final table; for all partitions: // 2.3.1 Drop if exists partition in final table // 2.3.2 Move partition directory // 2.3.3 Create partition with location // 2.3.4 Drop this staging table and delete directories // B. Evolution turned off: // 1. If table does not exists: simply create it (now it should exist) // 2. If table exists: // 2.1 Do not evolve table // 2.2 If snapshot table: // 2.2.1 Delete data in final table directory // 2.2.2 Move data from staging to final table directory // 2.2.3 Drop this staging table and delete directories // 2.3 If partitioned table, move partitions from staging to final table; for all partitions: // 2.3.1 Drop if exists partition in final table // 2.3.2 Move partition directory // 2.3.3 Create partition with location // 2.3.4 Drop this staging table and delete directories // Note: The queries below also serve as compatibility check module before conversion, an incompatible // .. schema throws a Runtime exeption, hence preventing further execution QueryBasedHivePublishEntity publishEntity = new QueryBasedHivePublishEntity(); List<String> publishQueries = publishEntity.getPublishQueries(); Map<String, String> publishDirectories = publishEntity.getPublishDirectories(); List<String> cleanupQueries = publishEntity.getCleanupQueries(); List<String> cleanupDirectories = publishEntity.getCleanupDirectories(); // Step: // A.1, B.1: If table does not exists, simply create it if (!destinationTableMeta.isPresent()) { String createTargetTableDDL = HiveAvroORCQueryGenerator.generateCreateTableDDL(outputAvroSchema, orcTableName, orcDataLocation, Optional.of(orcTableDatabase), Optional.of(partitionsDDLInfo), clusterBy, Optional.<Map<String, HiveAvroORCQueryGenerator.COLUMN_SORT_ORDER>>absent(), numBuckets, Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), tableProperties, isEvolutionEnabled, destinationTableMeta, new HashMap<String, String>()); publishQueries.add(createTargetTableDDL); log.debug("Create final table DDL: " + createTargetTableDDL); } // Step: // A.2.1: If table pre-exists (destinationTableMeta would be present), evolve table // B.2.1: No-op List<String> evolutionDDLs = HiveAvroORCQueryGenerator.generateEvolutionDDL(orcStagingTableName, orcTableName, Optional.of(orcTableDatabase), Optional.of(orcTableDatabase), outputAvroSchema, isEvolutionEnabled, hiveColumns, destinationTableMeta); log.debug("Evolve final table DDLs: " + evolutionDDLs); EventWorkunitUtils.setEvolutionMetadata(workUnit, evolutionDDLs); // View (if present) must be updated if evolution happens shouldUpdateView |= evolutionDDLs.size() > 0; publishQueries.addAll(evolutionDDLs); if (partitionsDDLInfo.size() == 0) { // Step: // A.2.2, B.2.2: Snapshot table // Step: // A.2.2.1, B.2.2.1: Delete data in final table directory // A.2.2.2, B.2.2.2: Move data from staging to final table directory log.info("Snapshot directory to move: " + orcStagingDataLocation + " to: " + orcDataLocation); publishDirectories.put(orcStagingDataLocation, orcDataLocation); // Step: // A.2.2.3, B.2.2.3: Drop this staging table and delete directories String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(orcTableDatabase, orcStagingTableName); log.debug("Drop staging table DDL: " + dropStagingTableDDL); cleanupQueries.add(dropStagingTableDDL); // Delete: orcStagingDataLocation log.info("Staging table directory to delete: " + orcStagingDataLocation); cleanupDirectories.add(orcStagingDataLocation); } else { // Step: // A.2.3, B.2.3: If partitioned table, move partitions from staging to final table; for all partitions: // Step: // A.2.3.1, B.2.3.1: Drop if exists partition in final table List<String> dropPartitionsDDL = HiveAvroORCQueryGenerator.generateDropPartitionsDDL(orcTableDatabase, orcTableName, partitionsDMLInfo); log.debug("Drop partitions if exist in final table: " + dropPartitionsDDL); publishQueries.addAll(dropPartitionsDDL); // Step: // A.2.3.2, B.2.3.2: Move partition directory // Move: orcStagingDataPartitionLocation to: orcFinalDataPartitionLocation String orcFinalDataPartitionLocation = orcDataLocation + Path.SEPARATOR + orcStagingDataPartitionDirName; log.info("Partition directory to move: " + orcStagingDataPartitionLocation + " to: " + orcFinalDataPartitionLocation); publishDirectories.put(orcStagingDataPartitionLocation, orcFinalDataPartitionLocation); // Step: // A.2.3.3, B.2.3.3: Create partition with location (and update storage format if not in ORC already) String orcDataPartitionLocation = orcDataLocation + Path.SEPARATOR + orcStagingDataPartitionDirName; if (workUnit.getPropAsBoolean(HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY, DEFAULT_HIVE_CONVERSION_SETSERDETOAVROEXPLICITELY)) { List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL( orcTableDatabase, orcTableName, orcDataPartitionLocation, partitionsDMLInfo, Optional.<String>absent()); log.debug("Create final partition DDL: " + createFinalPartitionDDL); publishQueries.addAll(createFinalPartitionDDL); // Updating storage format non-transactionally is a stop gap measure until Hive supports transactionally update // .. storage format in ADD PARITTION command (today it only supports specifying location) List<String> updatePartitionStorageFormatDDL = HiveAvroORCQueryGenerator .generateAlterTableOrPartitionStorageFormatDDL(orcTableDatabase, orcTableName, Optional.of(partitionsDMLInfo), ORC_FORMAT); log.debug("Update final partition storage format to ORC (if not already in ORC)"); publishQueries.addAll(updatePartitionStorageFormatDDL); } else { List<String> createFinalPartitionDDL = HiveAvroORCQueryGenerator.generateCreatePartitionDDL( orcTableDatabase, orcTableName, orcDataPartitionLocation, partitionsDMLInfo, Optional.fromNullable(ORC_FORMAT)); log.debug("Create final partition DDL: " + createFinalPartitionDDL); publishQueries.addAll(createFinalPartitionDDL); } // Step: // A.2.3.4, B.2.3.4: Drop this staging table and delete directories String dropStagingTableDDL = HiveAvroORCQueryGenerator.generateDropTableDDL(orcTableDatabase, orcStagingTableName); log.debug("Drop staging table DDL: " + dropStagingTableDDL); cleanupQueries.add(dropStagingTableDDL); // Delete: orcStagingDataLocation log.info("Staging table directory to delete: " + orcStagingDataLocation); cleanupDirectories.add(orcStagingDataLocation); } /* * Drop the replaced partitions if any. This is required in case the partition being converted is derived from * several other partitions. E.g. Daily partition is a replacement of hourly partitions of the same day. When daily * partition is converted to ORC all it's hourly ORC partitions need to be dropped. */ publishQueries.addAll(HiveAvroORCQueryGenerator.generateDropPartitionsDDL(orcTableDatabase, orcTableName, getDropPartitionsDDLInfo(conversionEntity))); /* * Create or update view over the ORC table if specified in the config (ie. wrapper view name is present in config) */ if (wrapperViewName.isPresent()) { String viewName = wrapperViewName.get(); List<String> createOrUpdateViewDDLs = HiveAvroORCQueryGenerator.generateCreateOrUpdateViewDDL( orcTableDatabase, orcTableName, orcTableDatabase, viewName, shouldUpdateView); log.debug("Create or update View DDLs: " + createOrUpdateViewDDLs); publishQueries.addAll(createOrUpdateViewDDLs); } HiveAvroORCQueryGenerator.serializePublishCommands(workUnit, publishEntity); log.debug("Publish partition entity: " + publishEntity); log.debug("Conversion Query " + conversionEntity.getQueries()); EventWorkunitUtils.setEndDDLBuildTimeMetadata(workUnit, System.currentTimeMillis()); return new SingleRecordIterable<>(conversionEntity); }
From source file:gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java
License:Apache License
/*** * Get the ORC final table location of format: <ORC final table location>/final * @return ORC final table location./*from w w w . ja v a 2 s . c o m*/ */ private String getOrcDataLocation() { String orcDataLocation = getConversionConfig().getDestinationDataPath(); return orcDataLocation + Path.SEPARATOR + PUBLISHED_TABLE_SUBDIRECTORY; }
From source file:gobblin.data.management.conversion.hive.converter.AbstractAvroToOrcConverter.java
License:Apache License
/*** * Get the ORC staging table location of format: <ORC final table location>/<ORC staging table name> * @param orcStagingTableName ORC staging table name. * @return ORC staging table location./* ww w.ja v a 2 s . co m*/ */ private String getOrcStagingDataLocation(String orcStagingTableName) { String orcDataLocation = getConversionConfig().getDestinationDataPath(); return orcDataLocation + Path.SEPARATOR + orcStagingTableName; }
From source file:gobblin.data.management.conversion.hive.converter.HiveAvroToOrcConverter.java
License:Open Source License
private String getOrcDataLocation(WorkUnitState workUnit, String avroDataLocation, String orcTableName, Optional<String> postfix) { String orcDataLocation;// ww w . j ava2 s . co m // By default ORC table creates a new directory where Avro data resides with _orc postfix, but this can be // .. overridden by specifying this property String orcTableAlternateLocation = workUnit.getJobState().getProp(ORC_TABLE_ALTERNATE_LOCATION); if (StringUtils.isNotBlank(orcTableAlternateLocation)) { orcDataLocation = new Path(orcTableAlternateLocation, orcTableName).toString(); } else { orcDataLocation = StringUtils.removeEnd(avroDataLocation, Path.SEPARATOR) + "_orc"; } if (postfix.isPresent()) { orcDataLocation += postfix.get(); } // Each job execution further writes to a sub-directory within ORC data directory to support stagin use-case // .. ie for atomic swap orcDataLocation += Path.SEPARATOR + workUnit.getJobState().getId(); return orcDataLocation; }