List of usage examples for org.apache.hadoop.fs FileSystem getLocal
public static LocalFileSystem getLocal(Configuration conf) throws IOException
From source file:com.cloudera.flume.handlers.hdfs.SeqfileEventSource.java
License:Apache License
@Override public void open() throws IOException { LOG.debug("opening SeqfileEventSource " + fname); Configuration conf = FlumeConfiguration.get(); FileSystem fs = FileSystem.getLocal(conf); reader = new SequenceFile.Reader(fs, new Path(fname), conf); }
From source file:com.cloudera.flume.handlers.seqfile.TestSequenceFileOutputFormat.java
License:Apache License
/** * This is a helper method that is a lot like the above, except that it * directly creates the output format so that we can configure it, since * this isn't possible via the configuration language currently. *//*from w w w .j av a 2s . co m*/ public void sequenceFileWriteReadHelper(String... args) throws IOException, FlumeSpecException, InterruptedException { MemorySinkSource mem = MemorySinkSource.cannedData("test ", 5); // setup sink. File f = FileUtil.createTempFile("sequencefile", ".seq"); f.deleteOnExit(); FileOutputStream fos = new FileOutputStream(f); LOG.info("filename before escaping: " + f.getAbsolutePath()); OutputFormat out = FormatFactory.get().getOutputFormat("seqfile", args); mem.open(); Event e = mem.next(); while (e != null) { out.format(fos, e); e = mem.next(); } mem.open(); FlumeConfiguration conf = FlumeConfiguration.get(); FileSystem fs = FileSystem.getLocal(conf); SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(f.toURI()), conf); WriteableEventKey k = new WriteableEventKey(); WriteableEvent evt = new WriteableEvent(); while (r.next(k, evt)) { Event expected = mem.next(); assertEquals(evt.getTimestamp(), expected.getTimestamp()); assertEquals(evt.getNanos(), expected.getNanos()); assertEquals(evt.getPriority(), expected.getPriority()); assertTrue(Arrays.equals(evt.getBody(), expected.getBody())); } }
From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsCustomRecordParseOLRRun.java
License:Apache License
@Before public void setup() throws Exception { defaultConf = new JobConf(); defaultConf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(defaultConf); inputFileName = "kboar-shard-0.txt"; baseDir = Files.createTempDir(); File inputFile = new File(baseDir, inputFileName); TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile); workDir = new Path(baseDir.getAbsolutePath()); }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java
License:Apache License
@Before public void setup() throws Exception { defaultConf = new JobConf(); defaultConf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(defaultConf); inputFileName = "kboar-shard-0.txt"; baseDir = Files.createTempDir(); File inputFile = new File(baseDir, inputFileName); TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile); workDir = new Path(baseDir.getAbsolutePath()); configuration = new Configuration(); // feature vector size configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000); configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20); // local input split path configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0"); configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname", "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory"); /*//ww w. j a v a 2 s . c o m * // predictor label names c.set( * "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); // * predictor var types c.set( * "com.cloudera.knittingboar.setup.PredictorVariableTypes", * "numeric,numeric" ); // target variables c.set( * "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); // * column header names c.set( * "com.cloudera.knittingboar.setup.ColumnHeaderNames", * "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set( * "com.cloudera.knittingboar.setup.ColumnHeaderNames", * "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" * ); */ }
From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java
License:Apache License
@Before public void setup() throws Exception { defaultConf = new JobConf(); defaultConf.set("fs.defaultFS", "file:///"); localFs = FileSystem.getLocal(defaultConf); inputFileName = "kboar-shard-0.txt"; baseDir = Files.createTempDir(); File inputFile = new File(baseDir, inputFileName); TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile); workDir = new Path(baseDir.getAbsolutePath()); configuration = new Configuration(); // feature vector size configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000); configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20); configuration.setInt("com.cloudera.knittingboar.setup.BatchSize", 200); // local input split path configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0"); configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname", "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory"); /* // predictor label names c.set( "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); /*from ww w. ja va 2s . c o m*/ // predictor var types c.set( "com.cloudera.knittingboar.setup.PredictorVariableTypes", "numeric,numeric" ); // target variables c.set( "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); // column header names c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" ); */ }
From source file:com.cloudera.oryx.common.servcomp.Store.java
License:Open Source License
private Store() { try {//from w w w. j av a2 s . c o m Configuration conf = OryxConfiguration.get(); if (Namespaces.isLocalData()) { fs = FileSystem.getLocal(conf); } else { UserGroupInformation.setConfiguration(conf); fs = FileSystem.get(URI.create(Namespaces.get().getPrefix()), conf); } } catch (IOException ioe) { log.error("Unable to configure Store", ioe); throw new IllegalStateException(ioe); } }
From source file:com.cloudera.recordbreaker.analyzer.FormatAnalyzer.java
License:Open Source License
/** * Describe <code>main</code> method here. * * @param argv[] a <code>String</code> value * @exception IOException if an error occurs */// w w w . j a v a 2 s . c om public static void main(String argv[]) throws IOException { if (argv.length < 1) { System.err.println("Usage: FormatAnalyzer <inputfile> <schemaDbDir>"); return; } FileSystem fs = FileSystem.getLocal(null); Path inputFile = new Path(new File(argv[0]).getCanonicalPath()); File schemaDbDir = new File(argv[1]).getCanonicalFile(); FormatAnalyzer fa = new FormatAnalyzer(schemaDbDir); DataDescriptor descriptor = fa.describeData(fs, inputFile); System.err.println("Filename: " + descriptor.getFilename()); System.err.println("Filetype identifier: " + descriptor.getFileTypeIdentifier()); List<SchemaDescriptor> schemas = descriptor.getSchemaDescriptor(); if (schemas == null) { System.err.println("No schema found."); } else { System.err.println("Num schemas found: " + schemas.size()); System.err.println(); for (SchemaDescriptor sd : schemas) { Schema s = sd.getSchema(); System.err.println("Schema src desc: " + sd.getSchemaSourceDescription()); System.err.println(); System.err.println("Schema identifier: " + sd.getSchemaIdentifier()); System.err.println(); int i = 0; for (Iterator it = sd.getIterator(); it.hasNext();) { GenericData.Record curRow = (GenericData.Record) it.next(); System.err.println(i + ". Elt: " + curRow); i++; } } } }
From source file:com.cloudera.recordbreaker.analyzer.UnknownTextSchemaDescriptor.java
License:Open Source License
void computeSchema() throws IOException { this.randId = new Random().nextInt(); LearnStructure ls = new LearnStructure(); FileSystem fs = FSAnalyzer.getInstance().getFS(); FileSystem localFS = FileSystem.getLocal(new Configuration()); Path inputPath = dd.getFilename(); File workingParserFile = File.createTempFile("textdesc", "typetree", null); File workingSchemaFile = File.createTempFile("textdesc", "schema", null); ls.inferRecordFormat(fs, inputPath, localFS, new Path(workingSchemaFile.getCanonicalPath()), new Path(workingParserFile.getCanonicalPath()), null, null, false, MAX_LINES); this.schema = Schema.parse(workingSchemaFile); DataInputStream in = new DataInputStream(localFS.open(new Path(workingParserFile.getCanonicalPath()))); try {// w w w . j av a2s .c om this.typeTree = InferredType.readType(in); } catch (IOException iex) { iex.printStackTrace(); throw iex; } finally { in.close(); } //System.err.println("Recovered unknowntext schema: " + schema); }
From source file:com.cloudera.recordbreaker.learnstructure.LearnStructure.java
License:Open Source License
public static void main(String argv[]) throws IOException { if (argv.length < 2) { System.err.println("Usage: LearnStructure <input-datafile> <outdir> (-emitAvro (true)|false)"); return;/*from ww w . ja va 2 s. co m*/ } FileSystem localFS = FileSystem.getLocal(new Configuration()); boolean emitAvro = true; int i = 0; Path f = new Path(new File(argv[i++]).getCanonicalPath()); File outdir = new File(argv[i++]).getCanonicalFile(); for (; i < argv.length; i++) { if ("-emitAvro".equals(argv[i])) { i++; emitAvro = "true".equals(argv[i]); } } System.err.println("Input file: " + f.toString()); System.err.println("Output directory: " + outdir.getCanonicalPath()); if (outdir.exists()) { throw new IOException("Output directory already exists: " + outdir); } outdir.mkdirs(); Path schemaFile = new Path(outdir.getCanonicalPath(), SCHEMA_FILENAME); Path parseTreeFile = new Path(outdir.getCanonicalPath(), PARSER_FILENAME); Path jsonDataFile = null; Path avroDataFile = null; if (emitAvro) { jsonDataFile = new Path(outdir.getCanonicalPath(), JSONDATA_FILENAME); avroDataFile = new Path(outdir.getCanonicalPath(), DATA_FILENAME); } LearnStructure ls = new LearnStructure(); ls.inferRecordFormat(localFS, f, localFS, schemaFile, parseTreeFile, jsonDataFile, avroDataFile, true, -1); }
From source file:com.cloudera.recordbreaker.learnstructure.test.InferenceTest.java
License:Open Source License
/** * runSingletonTest() executes LearnStructure test for a single given input text file. * * @param inputData a <code>File</code> value * @return a <code>boolean</code> value; did the test succeed? *//*from ww w .j a v a2 s . c o m*/ boolean runSingletonTest(File workingDir, File inputData) { File tmpSingletonDir = new File(workingDir, "testinference-" + inputData.getName()); try { FileSystem localFS = FileSystem.getLocal(new Configuration()); tmpSingletonDir.mkdir(); Path schemaFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.SCHEMA_FILENAME); Path parseTreeFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.PARSER_FILENAME); Path jsonDataFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.JSONDATA_FILENAME); Path avroFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.DATA_FILENAME); LearnStructure ls = new LearnStructure(); // Check to see how many records exist in the original input int lineCount = 0; BufferedReader in2 = new BufferedReader(new FileReader(inputData)); try { while (in2.readLine() != null) { lineCount++; } } finally { in2.close(); } // Infer structure ls.inferRecordFormat(localFS, new Path(inputData.getCanonicalPath()), localFS, schemaFile, parseTreeFile, jsonDataFile, avroFile, false, lineCount); // Test the inferred structure // First, load in the avro file and see how many records there are. int avroCount = 0; DataFileReader in = new DataFileReader(new File(avroFile.toString()), new GenericDatumReader()); try { Iterator it = in.iterator(); while (it.hasNext()) { avroCount++; it.next(); } } finally { in.close(); } // Was the synthesized parser able to figure out the file? double parseRatio = avroCount / (1.0 * lineCount); return (parseRatio > MIN_PARSE_RATIO); } catch (IOException e) { try { System.err.println("File: " + inputData.getCanonicalPath()); } catch (IOException ex) { ex.printStackTrace(); } e.printStackTrace(); return false; } finally { // remove temp files tmpSingletonDir.delete(); } }