List of usage examples for org.apache.hadoop.fs Path SEPARATOR
String SEPARATOR
To view the source code for org.apache.hadoop.fs Path SEPARATOR.
Click Source Link
From source file:gobblin.data.management.conversion.hive.validation.ValidationJob.java
License:Apache License
/*** * Execute Hive queries using {@link HiveJdbcConnector} and validate results. * @param queries Queries to execute.//from w w w. j a va2 s. c om */ @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "SQL_NONCONSTANT_STRING_PASSED_TO_EXECUTE", justification = "Temporary fix") private List<Long> getValidationOutputFromHive(List<String> queries) throws IOException { if (null == queries || queries.size() == 0) { log.warn("No queries specified to be executed"); return Collections.emptyList(); } List<Long> rowCounts = Lists.newArrayList(); Closer closer = Closer.create(); try { HiveJdbcConnector hiveJdbcConnector = closer.register(HiveJdbcConnector.newConnectorWithProps(props)); for (String query : queries) { String hiveOutput = "hiveConversionValidationOutput_" + UUID.randomUUID().toString(); Path hiveTempDir = new Path("/tmp" + Path.SEPARATOR + hiveOutput); query = "INSERT OVERWRITE DIRECTORY '" + hiveTempDir + "' " + query; log.info("Executing query: " + query); try { hiveJdbcConnector.executeStatements("SET hive.exec.compress.output=false", "SET hive.auto.convert.join=false", query); FileStatus[] fileStatusList = this.fs.listStatus(hiveTempDir); List<FileStatus> files = new ArrayList<>(); for (FileStatus fileStatus : fileStatusList) { if (fileStatus.isFile()) { files.add(fileStatus); } } if (files.size() > 1) { log.warn("Found more than one output file. Should have been one."); } else if (files.size() == 0) { log.warn("Found no output file. Should have been one."); } else { String theString = IOUtils.toString( new InputStreamReader(this.fs.open(files.get(0).getPath()), Charsets.UTF_8)); log.info("Found row count: " + theString.trim()); if (StringUtils.isBlank(theString.trim())) { rowCounts.add(0l); } else { try { rowCounts.add(Long.parseLong(theString.trim())); } catch (NumberFormatException e) { throw new RuntimeException("Could not parse Hive output: " + theString.trim(), e); } } } } finally { if (this.fs.exists(hiveTempDir)) { log.debug("Deleting temp dir: " + hiveTempDir); this.fs.delete(hiveTempDir, true); } } } } catch (SQLException e) { throw new RuntimeException(e); } finally { try { closer.close(); } catch (IOException e) { log.warn("Could not close HiveJdbcConnector", e); } } return rowCounts; }
From source file:gobblin.data.management.ConversionHiveTestUtils.java
License:Apache License
public static Schema readSchemaFromJsonFile(String directory, String filename) throws IOException { return new Schema.Parser().parse(ConversionHiveTestUtils.class.getClassLoader() .getResourceAsStream(StringUtils.removeEnd(directory, Path.SEPARATOR) + Path.SEPARATOR + filename)); }
From source file:gobblin.data.management.ConversionHiveTestUtils.java
License:Apache License
public static String readQueryFromFile(String directory, String filename) throws IOException { InputStream is = ConversionHiveTestUtils.class.getClassLoader() .getResourceAsStream(StringUtils.removeEnd(directory, Path.SEPARATOR) + Path.SEPARATOR + filename); return IOUtils.toString(is, "UTF-8"); }
From source file:gobblin.data.management.copy.hive.HiveDatasetFinder.java
License:Apache License
/** * Gets the {@link Config} for this <code>dbAndTable</code>. * Cases:/*from w w w. ja v a2 s .com*/ * <ul> * <li>If {@link #configStoreUri} is available it gets the dataset config from the config store at this uri * <li>If {@link #configStoreUri} is not available it uses the job config as dataset config * <li>If {@link #datasetConfigPrefix} is specified, only configs with this prefix is returned * <li>If {@link #datasetConfigPrefix} is not specified, all configs are returned * </ul> * @param table of the dataset to get config * @return the {@link Config} for <code>dbAndTable</code> */ private Config getDatasetConfig(Table table) throws ConfigStoreFactoryDoesNotExistsException, ConfigStoreCreationException, URISyntaxException { Config datasetConfig; // Config store enabled if (this.configStoreUri.isPresent()) { datasetConfig = this.configClient.getConfig( this.configStoreUri.get() + Path.SEPARATOR + this.configStoreDatasetUriBuilder.apply(table)); // If config store is not enabled use job config } else { datasetConfig = this.jobConfig; } return StringUtils.isBlank(this.datasetConfigPrefix) ? datasetConfig : ConfigUtils.getConfig(datasetConfig, this.datasetConfigPrefix, ConfigFactory.empty()); }
From source file:gobblin.data.management.copy.writer.TarArchiveInputStreamDataWriter.java
License:Apache License
/** * Untars the passed in {@link FileAwareInputStream} to the task's staging directory. Uses the name of the root * {@link TarArchiveEntry} in the stream as the directory name for the untarred file. The method also commits the data * by moving the file from staging to output directory. * * @see gobblin.data.management.copy.writer.FileAwareInputStreamDataWriter#write(gobblin.data.management.copy.FileAwareInputStream) *///from www . j av a2 s . c o m @Override public void writeImpl(InputStream inputStream, Path writeAt, CopyableFile copyableFile) throws IOException { this.closer.register(inputStream); TarArchiveInputStream tarIn = new TarArchiveInputStream(inputStream); final ReadableByteChannel inputChannel = Channels.newChannel(tarIn); TarArchiveEntry tarEntry; // flush the first entry in the tar, which is just the root directory tarEntry = tarIn.getNextTarEntry(); String tarEntryRootName = StringUtils.remove(tarEntry.getName(), Path.SEPARATOR); log.info("Unarchiving at " + writeAt); try { while ((tarEntry = tarIn.getNextTarEntry()) != null) { // the API tarEntry.getName() is misleading, it is actually the path of the tarEntry in the tar file String newTarEntryPath = tarEntry.getName().replace(tarEntryRootName, writeAt.getName()); Path tarEntryStagingPath = new Path(writeAt.getParent(), newTarEntryPath); if (tarEntry.isDirectory() && !this.fs.exists(tarEntryStagingPath)) { this.fs.mkdirs(tarEntryStagingPath); } else if (!tarEntry.isDirectory()) { FSDataOutputStream out = this.fs.create(tarEntryStagingPath, true); final WritableByteChannel outputChannel = Channels.newChannel(out); try { StreamCopier copier = new StreamCopier(inputChannel, outputChannel); if (isInstrumentationEnabled()) { copier.withCopySpeedMeter(this.copySpeedMeter); } this.bytesWritten.addAndGet(copier.copy()); if (isInstrumentationEnabled()) { log.info("File {}: copied {} bytes, average rate: {} B/s", copyableFile.getOrigin().getPath(), this.copySpeedMeter.getCount(), this.copySpeedMeter.getMeanRate()); } else { log.info("File {} copied.", copyableFile.getOrigin().getPath()); } } finally { out.close(); outputChannel.close(); } } } } finally { tarIn.close(); inputChannel.close(); inputStream.close(); } }
From source file:gobblin.data.management.hive.HiveConfigClientUtils.java
License:Apache License
/** * Get the dataset uri for a hive db and table. The uri is relative to the store uri . * @param table the hive table for which a config client uri needs to be built *///from www . j a v a2 s .c o m public static String getDatasetUri(Table table) { return HIVE_DATASETS_CONFIG_PREFIX + table.getDbName() + Path.SEPARATOR + table.getTableName(); }
From source file:gobblin.data.management.retention.sql.SqlBasedRetentionPoc.java
License:Apache License
private void insertSnapshot(Path snapshotPath) throws Exception { String datasetPath = StringUtils.substringBeforeLast(snapshotPath.toString(), Path.SEPARATOR); String snapshotName = StringUtils.substringAfterLast(snapshotPath.toString(), Path.SEPARATOR); long ts = Long.parseLong(StringUtils.substringBefore(snapshotName, "-PT-")); long recordCount = Long.parseLong(StringUtils.substringAfter(snapshotName, "-PT-")); PreparedStatement insert = connection.prepareStatement("INSERT INTO Snapshots VALUES (?, ?, ?, ?, ?)"); insert.setString(1, datasetPath);/*from w w w . j av a 2 s. com*/ insert.setString(2, snapshotName); insert.setString(3, snapshotPath.toString()); insert.setTimestamp(4, new Timestamp(ts)); insert.setLong(5, recordCount); insert.executeUpdate(); }
From source file:gobblin.data.management.retention.sql.SqlBasedRetentionPoc.java
License:Apache License
private void insertDailyPartition(Path dailyPartitionPath) throws Exception { String datasetPath = StringUtils.substringBeforeLast(dailyPartitionPath.toString(), Path.SEPARATOR + "daily"); DateTime partition = DateTimeFormat.forPattern(DAILY_PARTITION_PATTERN) .parseDateTime(StringUtils.substringAfter(dailyPartitionPath.toString(), "daily" + Path.SEPARATOR)); PreparedStatement insert = connection.prepareStatement("INSERT INTO Daily_Partitions VALUES (?, ?, ?)"); insert.setString(1, datasetPath);//from ww w .ja v a 2 s . c o m insert.setString(2, dailyPartitionPath.toString()); insert.setTimestamp(3, new Timestamp(partition.getMillis())); insert.executeUpdate(); }
From source file:gobblin.runtime.mapreduce.GobblinOutputCommitterTest.java
License:Apache License
@BeforeClass public void setupWorkUnitFiles() throws IOException { this.conf = new Configuration(); this.fs = FileSystem.getLocal(this.conf); this.stagingDirs = Lists.newArrayList(); // Create a list of WorkUnits to serialize WorkUnit wu1 = createAndSetWorkUnit("wu1"); WorkUnit wu2 = createAndSetWorkUnit("wu2"); WorkUnit wu3 = createAndSetWorkUnit("wu3"); WorkUnit wu4 = createAndSetWorkUnit("wu4"); // Create a MultiWorkUnit to serialize MultiWorkUnit mwu1 = MultiWorkUnit.createEmpty(); mwu1.setProp(ConfigurationKeys.TASK_ID_KEY, System.nanoTime()); mwu1.addWorkUnits(Arrays.asList(wu3, wu4)); Path inputDir = new Path(new Path(OUTPUT_PATH, JOB_NAME), "input"); // Writer each WorkUnit to a separate file under inputDir Closer closer = Closer.create();//from www. ja va 2s .co m try { wu1.write(closer.register(this.fs .create(new Path(inputDir, wu1.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_") .suffix("wu")))); wu2.write(closer.register(this.fs .create(new Path(inputDir, wu2.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_") .suffix("wu")))); mwu1.write(closer.register(this.fs .create(new Path(inputDir, mwu1.getProp(ConfigurationKeys.TASK_ID_KEY) + Path.SEPARATOR + "_") .suffix("mwu")))); } finally { closer.close(); } }
From source file:gobblin.runtime.mapreduce.GobblinOutputCommitterTest.java
License:Apache License
/** * Helper method to create a {@link WorkUnit}, set it's staging directories, and create the staging directories on the * local fs/*from ww w. j a va 2s . c om*/ * @param workUnitName is the name of the {@link WorkUnit} to create * @return the {@link WorkUnit} that was created * @throws IOException */ private WorkUnit createAndSetWorkUnit(String workUnitName) throws IOException { WorkUnit wu = WorkUnit.createEmpty(); wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.TASK_ID_KEY, 1, 0), System.nanoTime()); Path wuStagingDir = new Path(OUTPUT_PATH, JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + STAGING_DIR_NAME); wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_STAGING_DIR, 1, 0), wuStagingDir.toString()); this.fs.mkdirs(wuStagingDir); this.stagingDirs.add(wuStagingDir); Path wuOutputDir = new Path(OUTPUT_PATH, JOB_NAME + Path.SEPARATOR + workUnitName + Path.SEPARATOR + OUTPUT_DIR_NAME); wu.setProp(ForkOperatorUtils.getPropertyNameForBranch(ConfigurationKeys.WRITER_OUTPUT_DIR, 1, 0), wuOutputDir.toString()); this.fs.mkdirs(wuOutputDir); this.stagingDirs.add(wuOutputDir); return wu; }