List of usage examples for org.apache.hadoop.fs FileSystem open
public FSDataInputStream open(PathHandle fd) throws IOException
From source file:com.cloudera.sqoop.TestAllTables.java
License:Apache License
public void testMultiTableImportWithExclude() throws IOException { String exclude = this.tableNames.get(0); String[] argv = getArgv(true, new String[] { exclude }); runImport(new ImportAllTablesTool(), argv); Path warehousePath = new Path(this.getWarehouseDir()); int i = 0;//from w w w.j a v a 2s . c o m for (String tableName : this.tableNames) { Path tablePath = new Path(warehousePath, tableName); Path filePath = new Path(tablePath, "part-m-00000"); // dequeue the expected value for this table. This // list has the same order as the tableNames list. String expectedVal = Integer.toString(i++) + "," + this.expectedStrings.get(0); this.expectedStrings.remove(0); BufferedReader reader = null; if (!isOnPhysicalCluster()) { reader = new BufferedReader( new InputStreamReader(new FileInputStream(new File(filePath.toString())))); } else { FSDataInputStream dis; FileSystem dfs = FileSystem.get(getConf()); if (tableName.equals(exclude)) { try { dis = dfs.open(filePath); assertFalse(true); } catch (FileNotFoundException e) { // Success continue; } } else { dis = dfs.open(filePath); } reader = new BufferedReader(new InputStreamReader(dis)); } try { String line = reader.readLine(); assertEquals("Table " + tableName + " expected a different string", expectedVal, line); } finally { IOUtils.closeStream(reader); } } }
From source file:com.cloudera.sqoop.TestCompression.java
License:Apache License
public void runTextCompressionTest(CompressionCodec codec, int expectedNum) throws IOException { String[] columns = HsqldbTestServer.getFieldNames(); String[] argv = getArgv(true, columns, codec, "--as-textfile"); runImport(argv);//from w w w . ja v a2 s . c om Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); } FileSystem fs = FileSystem.get(conf); if (codec == null) { codec = new GzipCodec(); } ReflectionUtils.setConf(codec, getConf()); Path p = new Path(getDataFilePath().toString() + codec.getDefaultExtension()); InputStream is = codec.createInputStream(fs.open(p)); BufferedReader r = new BufferedReader(new InputStreamReader(is)); int numLines = 0; while (true) { String ln = r.readLine(); if (ln == null) { break; } numLines++; } r.close(); assertEquals(expectedNum, numLines); }
From source file:com.cloudera.sqoop.TestExport.java
License:Apache License
private void verifyCompressedFile(Path f, int expectedNumLines) throws IOException { Configuration conf = new Configuration(); if (!BaseSqoopTestCase.isOnPhysicalCluster()) { conf.set(CommonArgs.FS_DEFAULT_NAME, CommonArgs.LOCAL_FS); }/* w ww . j av a2 s . c o m*/ FileSystem fs = FileSystem.get(conf); InputStream is = fs.open(f); CompressionCodecFactory ccf = new CompressionCodecFactory(conf); CompressionCodec codec = ccf.getCodec(f); LOG.info("gzip check codec is " + codec); Decompressor decompressor = CodecPool.getDecompressor(codec); if (null == decompressor) { LOG.info("Verifying gzip sanity with null decompressor"); } else { LOG.info("Verifying gzip sanity with decompressor: " + decompressor.toString()); } is = codec.createInputStream(is, decompressor); BufferedReader r = new BufferedReader(new InputStreamReader(is)); int numLines = 0; while (true) { String ln = r.readLine(); if (ln == null) { break; } numLines++; } r.close(); assertEquals("Did not read back correct number of lines", expectedNumLines, numLines); LOG.info("gzip sanity check returned " + numLines + " lines; ok."); }
From source file:com.cloudera.sqoop.TestFreeFormQueryImport.java
License:Apache License
/** * Create two tables that share the common id column. Run free-form query * import on the result table that is created by joining the two tables on * the id column./* w w w.jav a 2 s . c om*/ */ public void testSimpleJoin() throws IOException { tableNames = new ArrayList<String>(); String[] types1 = { "SMALLINT", }; String[] vals1 = { "1", }; String tableName1 = getTableName(); createTableWithColTypes(types1, vals1); tableNames.add(tableName1); incrementTableNum(); String[] types2 = { "SMALLINT", "VARCHAR(32)", }; String[] vals2 = { "1", "'foo'", }; String tableName2 = getTableName(); createTableWithColTypes(types2, vals2); tableNames.add(tableName2); String query = "SELECT " + tableName1 + "." + getColName(0) + ", " + tableName2 + "." + getColName(1) + " " + "FROM " + tableName1 + " JOIN " + tableName2 + " ON (" + tableName1 + "." + getColName(0) + " = " + tableName2 + "." + getColName(0) + ") WHERE " + tableName1 + "." + getColName(0) + " < 3 AND $CONDITIONS"; runImport(getArgv(tableName1 + "." + getColName(0), query)); Path warehousePath = new Path(this.getWarehouseDir()); Path filePath = new Path(warehousePath, "part-m-00000"); String expectedVal = "1,foo"; BufferedReader reader = null; if (!isOnPhysicalCluster()) { reader = new BufferedReader(new InputStreamReader(new FileInputStream(new File(filePath.toString())))); } else { FileSystem dfs = FileSystem.get(getConf()); FSDataInputStream dis = dfs.open(filePath); reader = new BufferedReader(new InputStreamReader(dis)); } try { String line = reader.readLine(); assertEquals("QueryResult expected a different string", expectedVal, line); } finally { IOUtils.closeStream(reader); } }
From source file:com.cloudera.sqoop.TestIncrementalImport.java
License:Apache License
/** * Look at a directory that should contain files full of an imported 'id' * column. Assert that all numbers in [0, expectedNums) are present * in order./*ww w . ja v a 2 s . c om*/ */ public void assertDirOfNumbers(String tableName, int expectedNums) { try { FileSystem fs = FileSystem.getLocal(new Configuration()); Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR); Path tableDir = new Path(warehouse, tableName); FileStatus[] stats = fs.listStatus(tableDir); String[] fileNames = new String[stats.length]; for (int i = 0; i < stats.length; i++) { fileNames[i] = stats[i].getPath().toString(); } Arrays.sort(fileNames); // Read all the files in sorted order, adding the value lines to the list. List<String> receivedNums = new ArrayList<String>(); for (String fileName : fileNames) { if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(fileName)))); try { while (true) { String s = r.readLine(); if (null == s) { break; } receivedNums.add(s.trim()); } } finally { r.close(); } } assertEquals(expectedNums, receivedNums.size()); // Compare the received values with the expected set. for (int i = 0; i < expectedNums; i++) { assertEquals((int) i, (int) Integer.valueOf(receivedNums.get(i))); } } catch (Exception e) { fail("Got unexpected exception: " + StringUtils.stringifyException(e)); } }
From source file:com.cloudera.sqoop.TestIncrementalImport.java
License:Apache License
/** * Assert that a directory contains a file with exactly one line * in it, containing the prescribed number 'val'. *//* ww w. ja v a 2s . com*/ public void assertSpecificNumber(String tableName, int val) { try { FileSystem fs = FileSystem.getLocal(new Configuration()); Path warehouse = new Path(BaseSqoopTestCase.LOCAL_WAREHOUSE_DIR); Path tableDir = new Path(warehouse, tableName); FileStatus[] stats = fs.listStatus(tableDir); String[] filePaths = new String[stats.length]; for (int i = 0; i < stats.length; i++) { filePaths[i] = stats[i].getPath().toString(); } // Read the first file that is not a hidden file. boolean foundVal = false; for (String filePath : filePaths) { String fileName = new Path(filePath).getName(); if (fileName.startsWith("_") || fileName.startsWith(".")) { continue; } if (foundVal) { // Make sure we don't have two or more "real" files in the dir. fail("Got an extra data-containing file in this directory."); } BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(new Path(filePath)))); try { String s = r.readLine(); if (null == s) { fail("Unexpected empty file " + filePath + "."); } assertEquals(val, (int) Integer.valueOf(s.trim())); String nextLine = r.readLine(); if (nextLine != null) { fail("Expected only one result, but got another line: " + nextLine); } // Successfully got the value we were looking for. foundVal = true; } finally { r.close(); } } } catch (IOException e) { fail("Got unexpected exception: " + StringUtils.stringifyException(e)); } }
From source file:com.cloudera.sqoop.TestMerge.java
License:Apache License
/** * @return true if the file specified by path 'p' contains a line * that starts with 'prefix'/* ww w . j av a2 s . co m*/ */ protected boolean checkFileForLine(FileSystem fs, Path p, String prefix) throws IOException { BufferedReader r = new BufferedReader(new InputStreamReader(fs.open(p))); try { while (true) { String in = r.readLine(); if (null == in) { break; // done with the file. } if (in.startsWith(prefix)) { return true; } } } finally { r.close(); } return false; }
From source file:com.conversantmedia.mapreduce.example.distribute.DistributedObjectExample.java
License:Apache License
/** * Distributed {@link Set} containing the prepared list of blacklisted words. * /*from ww w . j a v a 2 s .c om*/ * @return Set the set of blacklisted words * @throws IOException if it fails to read in the file */ @Distribute public Set<String> getBlacklist() throws IOException { Set<String> blacklist = null; if (StringUtils.isNotBlank(context.blacklist)) { blacklist = new HashSet<>(); InputStreamReader reader = null; try { FileSystem fs = FileSystem.get(new Configuration()); FileStatus file = fs.getFileStatus(new Path(context.blacklist)); reader = new InputStreamReader(fs.open(file.getPath())); for (String line : IOUtils.readLines(reader)) { blacklist.add(line); } } finally { IOUtils.closeQuietly(reader); } } return blacklist; }
From source file:com.cotdp.hadoop.BrotliFileRecordReader.java
License:Apache License
/** * Initialize and open the ZIP file from the FileSystem *///w w w .j av a2 s. c o m @Override public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); // Set the file path as the key currentKey.set(path.getName()); // Open the stream fsin = fs.open(path); String cmd = "/bin/cat"; ProcessBuilder pb = new ProcessBuilder(); pb.redirectOutput(); pb.command(cmd); try { decompressor = pb.start(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:com.cotdp.hadoop.ZipFileRecordReader.java
License:Apache License
/** * Initialise and open the ZIP file from the FileSystem *//*from w w w .j a va2 s.c o m*/ public void initialize(InputSplit inputSplit, TaskAttemptContext taskAttemptContext) throws IOException, InterruptedException { FileSplit split = (FileSplit) inputSplit; Configuration conf = taskAttemptContext.getConfiguration(); Path path = split.getPath(); FileSystem fs = path.getFileSystem(conf); // Open the stream fsin = fs.open(path); zip = new ZipInputStream(fsin); try { entry = zip.getNextEntry(); } catch (ZipException e) { if (ZipFileInputFormat.getLenient() == false) throw e; } }