List of usage examples for org.apache.hadoop.fs Path toString
@Override
public String toString()
From source file:com.cloudera.sqoop.manager.SQLServerManagerImportManualTest.java
License:Apache License
private void doImportAndVerify(String tableName, String[] expectedResults, String... extraArgs) throws IOException { Path warehousePath = new Path(this.getWarehouseDir()); Path tablePath = new Path(warehousePath, tableName); Path filePath = new Path(tablePath, "part-m-00000"); File tableFile = new File(tablePath.toString()); if (tableFile.exists() && tableFile.isDirectory()) { // remove the directory before running the import. FileListing.recursiveDeleteDir(tableFile); }//w w w.j av a2 s. c om String[] argv = getArgv(tableName, extraArgs); try { runImport(argv); } catch (IOException ioe) { LOG.error("Got IOException during import: " + ioe.toString()); ioe.printStackTrace(); fail(ioe.toString()); } File f = new File(filePath.toString()); assertTrue("Could not find imported data file", f.exists()); BufferedReader r = null; try { // Read through the file and make sure it's all there. r = new BufferedReader(new InputStreamReader(new FileInputStream(f))); for (String expectedLine : expectedResults) { assertEquals(expectedLine, r.readLine()); } } catch (IOException ioe) { LOG.error("Got IOException verifying results: " + ioe.toString()); ioe.printStackTrace(); fail(ioe.toString()); } finally { IOUtils.closeStream(r); } }
From source file:com.cloudera.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = new Job(conf); String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); }/*from w ww. j av a2 s. c o m*/ // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }
From source file:com.cloudera.sqoop.mapreduce.MergeMapperBase.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { Configuration conf = context.getConfiguration(); keyColName = conf.get(MergeJob.MERGE_KEY_COL_KEY); InputSplit is = context.getInputSplit(); FileSplit fs = (FileSplit) is;//from w ww. j av a 2s. c o m Path splitPath = fs.getPath(); if (splitPath.toString().startsWith(conf.get(MergeJob.MERGE_NEW_PATH_KEY))) { this.isNew = true; } else if (splitPath.toString().startsWith(conf.get(MergeJob.MERGE_OLD_PATH_KEY))) { this.isNew = false; } else { throw new IOException( "File " + splitPath + " is not under new path " + conf.get(MergeJob.MERGE_NEW_PATH_KEY) + " or old path " + conf.get(MergeJob.MERGE_OLD_PATH_KEY)); } }
From source file:com.cloudera.sqoop.TestAllTables.java
License:Apache License
public void testMultiTableImport() throws IOException { String[] argv = getArgv(true, null); runImport(new ImportAllTablesTool(), argv); Path warehousePath = new Path(this.getWarehouseDir()); int i = 0;/*from w ww . ja va 2 s.c om*/ for (String tableName : this.tableNames) { Path tablePath = new Path(warehousePath, tableName); Path filePath = new Path(tablePath, "part-m-00000"); // dequeue the expected value for this table. This // list has the same order as the tableNames list. String expectedVal = Integer.toString(i++) + "," + this.expectedStrings.get(0); this.expectedStrings.remove(0); BufferedReader reader = null; if (!isOnPhysicalCluster()) { reader = new BufferedReader( new InputStreamReader(new FileInputStream(new File(filePath.toString())))); } else { FileSystem dfs = FileSystem.get(getConf()); FSDataInputStream dis = dfs.open(filePath); reader = new BufferedReader(new InputStreamReader(dis)); } try { String line = reader.readLine(); assertEquals("Table " + tableName + " expected a different string", expectedVal, line); } finally { IOUtils.closeStream(reader); } } }
From source file:com.cloudera.sqoop.TestAllTables.java
License:Apache License
public void testMultiTableImportWithExclude() throws IOException { String exclude = this.tableNames.get(0); String[] argv = getArgv(true, new String[] { exclude }); runImport(new ImportAllTablesTool(), argv); Path warehousePath = new Path(this.getWarehouseDir()); int i = 0;//from w w w .j a v a 2 s .c o m for (String tableName : this.tableNames) { Path tablePath = new Path(warehousePath, tableName); Path filePath = new Path(tablePath, "part-m-00000"); // dequeue the expected value for this table. This // list has the same order as the tableNames list. String expectedVal = Integer.toString(i++) + "," + this.expectedStrings.get(0); this.expectedStrings.remove(0); BufferedReader reader = null; if (!isOnPhysicalCluster()) { reader = new BufferedReader( new InputStreamReader(new FileInputStream(new File(filePath.toString())))); } else { FSDataInputStream dis; FileSystem dfs = FileSystem.get(getConf()); if (tableName.equals(exclude)) { try { dis = dfs.open(filePath); assertFalse(true); } catch (FileNotFoundException e) { // Success continue; } } else { dis = dfs.open(filePath); } reader = new BufferedReader(new InputStreamReader(dis)); } try { String line = reader.readLine(); assertEquals("Table " + tableName + " expected a different string", expectedVal, line); } finally { IOUtils.closeStream(reader); } } }
From source file:com.cloudera.sqoop.TestFreeFormQueryImport.java
License:Apache License
/** * Create two tables that share the common id column. Run free-form query * import on the result table that is created by joining the two tables on * the id column./*from ww w.j a v a2 s . com*/ */ public void testSimpleJoin() throws IOException { tableNames = new ArrayList<String>(); String[] types1 = { "SMALLINT", }; String[] vals1 = { "1", }; String tableName1 = getTableName(); createTableWithColTypes(types1, vals1); tableNames.add(tableName1); incrementTableNum(); String[] types2 = { "SMALLINT", "VARCHAR(32)", }; String[] vals2 = { "1", "'foo'", }; String tableName2 = getTableName(); createTableWithColTypes(types2, vals2); tableNames.add(tableName2); String query = "SELECT " + tableName1 + "." + getColName(0) + ", " + tableName2 + "." + getColName(1) + " " + "FROM " + tableName1 + " JOIN " + tableName2 + " ON (" + tableName1 + "." + getColName(0) + " = " + tableName2 + "." + getColName(0) + ") WHERE " + tableName1 + "." + getColName(0) + " < 3 AND $CONDITIONS"; runImport(getArgv(tableName1 + "." + getColName(0), query)); Path warehousePath = new Path(this.getWarehouseDir()); Path filePath = new Path(warehousePath, "part-m-00000"); String expectedVal = "1,foo"; BufferedReader reader = null; if (!isOnPhysicalCluster()) { reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(filePath.toString())))); } else { FileSystem dfs = FileSystem.get(getConf()); FSDataInputStream dis = dfs.open(filePath); reader = new BufferedReader(new InputStreamReader(dis)); } try { String line = reader.readLine(); assertEquals("QueryResult expected a different string", expectedVal, line); } finally { IOUtils.closeStream(reader); } }
From source file:com.cloudera.sqoop.TestMultiMaps.java
License:Apache License
public void runMultiMapTest(String splitByCol, int expectedSum) throws IOException { String[] columns = HsqldbTestServer.getFieldNames(); ClassLoader prevClassLoader = null; SequenceFile.Reader reader = null; String[] argv = getArgv(true, columns, splitByCol); runImport(argv);//from w w w. ja v a 2 s . c om try { ImportTool importTool = new ImportTool(); SqoopOptions opts = importTool.parseArguments(getArgv(false, columns, splitByCol), null, null, true); CompilationManager compileMgr = new CompilationManager(opts); String jarFileName = compileMgr.getJarFilename(); prevClassLoader = ClassLoaderStack.addJarFile(jarFileName, getTableName()); List<Path> paths = getDataFilePaths(); Configuration conf = new Configuration(); int curSum = 0; // We expect multiple files. We need to open all the files and sum up the // first column across all of them. for (Path p : paths) { reader = SeqFileReader.getSeqFileReader(p.toString()); // here we can actually instantiate (k, v) pairs. Object key = ReflectionUtils.newInstance(reader.getKeyClass(), conf); Object val = ReflectionUtils.newInstance(reader.getValueClass(), conf); // We know that these values are two ints separated by a ',' // character. Since this is all dynamic, though, we don't want to // actually link against the class and use its methods. So we just // parse this back into int fields manually. Sum them up and ensure // that we get the expected total for the first column, to verify that // we got all the results from the db into the file. // now sum up everything in the file. while (reader.next(key) != null) { reader.getCurrentValue(val); curSum += getFirstInt(val.toString()); } IOUtils.closeStream(reader); reader = null; } assertEquals("Total sum of first db column mismatch", expectedSum, curSum); } catch (InvalidOptionsException ioe) { fail(ioe.toString()); } catch (ParseException pe) { fail(pe.toString()); } finally { IOUtils.closeStream(reader); if (null != prevClassLoader) { ClassLoaderStack.setCurrentClassLoader(prevClassLoader); } } }
From source file:com.cloudera.training.metrics.JobHistoryHelper.java
License:Apache License
public static JobHistory.JobInfo getJobInfoFromHdfsOutputDir(String outputDir, Configuration conf) throws IOException { Path output = new Path(outputDir); Path historyLogDir = new Path(output, "_logs/history"); FileSystem fs = output.getFileSystem(conf); if (!fs.exists(output)) { throw new IOException("History directory " + historyLogDir.toString() + " does not exist"); }//from ww w . ja v a 2 s . c om Path[] jobFiles = FileUtil.stat2Paths(fs.listStatus(historyLogDir, jobLogFileFilter)); if (jobFiles.length == 0) { throw new IOException("Not a valid history directory " + historyLogDir.toString()); } String[] jobDetails = JobHistory.JobInfo.decodeJobHistoryFileName(jobFiles[0].getName()).split("_"); String jobId = jobDetails[2] + "_" + jobDetails[3] + "_" + jobDetails[4]; JobHistory.JobInfo job = new JobHistory.JobInfo(jobId); DefaultJobHistoryParser.parseJobTasks(jobFiles[0].toString(), job, fs); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(new Configuration(conf)); Configuration jobConf = job.getConfiguration(); if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); }/*from ww w .ja va 2s.c o m*/ job.setJarByClass(mapper); job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); job.setMapOutputKeyClass(mapperKey); job.setMapOutputValueClass(mapperValue); job.setOutputKeyClass(mapperKey); job.setOutputValueClass(mapperValue); jobConf.setBoolean("mapred.compress.map.output", true); job.setNumReduceTasks(0); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }
From source file:com.cloudy.mapred.base.JobUtil.java
License:Apache License
/** * Create a map and reduce Hadoop job. Does not set the name on the job. * @param inputPath The input {@link org.apache.hadoop.fs.Path} * @param outputPath The output {@link org.apache.hadoop.fs.Path} * @param inputFormat The {@link org.apache.hadoop.mapreduce.InputFormat} * @param mapper The {@link org.apache.hadoop.mapreduce.Mapper} class to use * @param mapperKey The {@link org.apache.hadoop.io.Writable} key class. If the Mapper is a no-op, * this value may be null * @param mapperValue The {@link org.apache.hadoop.io.Writable} value class. If the Mapper is a no-op, * this value may be null * @param reducer The {@link org.apache.hadoop.mapreduce.Reducer} to use * @param reducerKey The reducer key class. * @param reducerValue The reducer value class. * @param outputFormat The {@link org.apache.hadoop.mapreduce.OutputFormat}. * @param conf The {@link org.apache.hadoop.conf.Configuration} to use. * @return The {@link org.apache.hadoop.mapreduce.Job}. * @throws IOException if there is a problem with the IO. * * @see #getCustomJobName(String, org.apache.hadoop.mapreduce.JobContext, Class, Class) * @see #prepareJob(org.apache.hadoop.fs.Path, org.apache.hadoop.fs.Path, Class, Class, Class, Class, Class, * org.apache.hadoop.conf.Configuration) *//*from w w w . j a v a 2 s .com*/ public static Job prepareJob(Path inputPath, Path outputPath, Class<? extends InputFormat> inputFormat, Class<? extends Mapper> mapper, Class<? extends Writable> mapperKey, Class<? extends Writable> mapperValue, Class<? extends Reducer> reducer, Class<? extends Writable> reducerKey, Class<? extends Writable> reducerValue, Class<? extends OutputFormat> outputFormat, Configuration conf) throws IOException { Job job = new Job(conf); Configuration jobConf = job.getConfiguration(); if (reducer.equals(Reducer.class)) { if (mapper.equals(Mapper.class)) { throw new IllegalStateException("Can't figure out the user class jar file from mapper/reducer"); } job.setJarByClass(mapper); } else { job.setJarByClass(reducer); } job.setInputFormatClass(inputFormat); jobConf.set("mapred.input.dir", inputPath.toString()); job.setMapperClass(mapper); if (mapperKey != null) { job.setMapOutputKeyClass(mapperKey); } if (mapperValue != null) { job.setMapOutputValueClass(mapperValue); } // jobConf.setBoolean("mapred.compress.map.output", true); job.setReducerClass(reducer); job.setOutputKeyClass(reducerKey); job.setOutputValueClass(reducerValue); job.setOutputFormatClass(outputFormat); jobConf.set("mapred.output.dir", outputPath.toString()); return job; }