Example usage for org.apache.hadoop.fs FileSystem getLocal

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem getLocal.

Prototype

public static LocalFileSystem getLocal(Configuration conf) throws IOException

Source Link

Document

Get the local FileSystem.

Usage

From source file:com.cloudera.flume.handlers.hdfs.SeqfileEventSource.java

License:Apache License

@Override
public void open() throws IOException {
    LOG.debug("opening SeqfileEventSource " + fname);
    Configuration conf = FlumeConfiguration.get();
    FileSystem fs = FileSystem.getLocal(conf);
    reader = new SequenceFile.Reader(fs, new Path(fname), conf);
}

From source file:com.cloudera.flume.handlers.seqfile.TestSequenceFileOutputFormat.java

License:Apache License

/**
 * This is a helper method that is a lot like the above, except that it
 * directly creates the output format so that we can configure it, since
 * this isn't possible via the configuration language currently.
 *//*from  w  w w .j  av a  2s  .  co  m*/
public void sequenceFileWriteReadHelper(String... args)
        throws IOException, FlumeSpecException, InterruptedException {

    MemorySinkSource mem = MemorySinkSource.cannedData("test ", 5);

    // setup sink.
    File f = FileUtil.createTempFile("sequencefile", ".seq");
    f.deleteOnExit();
    FileOutputStream fos = new FileOutputStream(f);
    LOG.info("filename before escaping: " + f.getAbsolutePath());
    OutputFormat out = FormatFactory.get().getOutputFormat("seqfile", args);
    mem.open();
    Event e = mem.next();
    while (e != null) {
        out.format(fos, e);
        e = mem.next();
    }

    mem.open();

    FlumeConfiguration conf = FlumeConfiguration.get();
    FileSystem fs = FileSystem.getLocal(conf);
    SequenceFile.Reader r = new SequenceFile.Reader(fs, new Path(f.toURI()), conf);
    WriteableEventKey k = new WriteableEventKey();
    WriteableEvent evt = new WriteableEvent();
    while (r.next(k, evt)) {
        Event expected = mem.next();
        assertEquals(evt.getTimestamp(), expected.getTimestamp());
        assertEquals(evt.getNanos(), expected.getNanos());
        assertEquals(evt.getPriority(), expected.getPriority());
        assertTrue(Arrays.equals(evt.getBody(), expected.getBody()));
    }

}

From source file:com.cloudera.knittingboar.records.TestTwentyNewsgroupsCustomRecordParseOLRRun.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndSingleWorker.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
    configuration = new Configuration();
    // feature vector size
    configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000);
    configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20);
    // local input split path
    configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0");
    configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname",
            "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory");
    /*//ww w. j  a v  a  2  s . c  o  m
     * // predictor label names c.set(
     * "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" ); //
     * predictor var types c.set(
     * "com.cloudera.knittingboar.setup.PredictorVariableTypes",
     * "numeric,numeric" ); // target variables c.set(
     * "com.cloudera.knittingboar.setup.TargetVariableName", "color" ); //
     * column header names c.set(
     * "com.cloudera.knittingboar.setup.ColumnHeaderNames",
     * "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" ); //c.set(
     * "com.cloudera.knittingboar.setup.ColumnHeaderNames",
     * "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n"
     * );
     */
}

From source file:com.cloudera.knittingboar.sgd.TestRunPOLRMasterAndTwoWorkers.java

License:Apache License

@Before
public void setup() throws Exception {
    defaultConf = new JobConf();
    defaultConf.set("fs.defaultFS", "file:///");
    localFs = FileSystem.getLocal(defaultConf);
    inputFileName = "kboar-shard-0.txt";
    baseDir = Files.createTempDir();
    File inputFile = new File(baseDir, inputFileName);
    TestingUtils.copyDecompressed(inputFileName + ".gz", inputFile);
    workDir = new Path(baseDir.getAbsolutePath());
    configuration = new Configuration();
    // feature vector size
    configuration.setInt("com.cloudera.knittingboar.setup.FeatureVectorSize", 10000);
    configuration.setInt("com.cloudera.knittingboar.setup.numCategories", 20);
    configuration.setInt("com.cloudera.knittingboar.setup.BatchSize", 200);
    // local input split path
    configuration.set("com.cloudera.knittingboar.setup.LocalInputSplitPath", "hdfs://127.0.0.1/input/0");
    configuration.set("com.cloudera.knittingboar.setup.RecordFactoryClassname",
            "com.cloudera.knittingboar.records.TwentyNewsgroupsRecordFactory");
    /*    // predictor label names
        c.set( "com.cloudera.knittingboar.setup.PredictorLabelNames", "x,y" );
            /*from  ww  w.  ja  va  2s  . c o  m*/
        // predictor var types
        c.set( "com.cloudera.knittingboar.setup.PredictorVariableTypes", "numeric,numeric" );
                
        // target variables
        c.set( "com.cloudera.knittingboar.setup.TargetVariableName", "color" );
            
        // column header names
        c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "x,y,shape,color,k,k0,xx,xy,yy,a,b,c,bias" );
        //c.set( "com.cloudera.knittingboar.setup.ColumnHeaderNames", "\"x\",\"y\",\"shape\",\"color\",\"k\",\"k0\",\"xx\",\"xy\",\"yy\",\"a\",\"b\",\"c\",\"bias\"\n" );
     */
}

From source file:com.cloudera.oryx.common.servcomp.Store.java

License:Open Source License

private Store() {
    try {//from   w  w w.  j av a2 s . c  o  m
        Configuration conf = OryxConfiguration.get();
        if (Namespaces.isLocalData()) {
            fs = FileSystem.getLocal(conf);
        } else {
            UserGroupInformation.setConfiguration(conf);
            fs = FileSystem.get(URI.create(Namespaces.get().getPrefix()), conf);
        }
    } catch (IOException ioe) {
        log.error("Unable to configure Store", ioe);
        throw new IllegalStateException(ioe);
    }
}

From source file:com.cloudera.recordbreaker.analyzer.FormatAnalyzer.java

License:Open Source License

/**
 * Describe <code>main</code> method here.
 *
 * @param argv[] a <code>String</code> value
 * @exception IOException if an error occurs
 */// w  w  w  .  j a  v  a  2  s  . c om
public static void main(String argv[]) throws IOException {
    if (argv.length < 1) {
        System.err.println("Usage: FormatAnalyzer <inputfile> <schemaDbDir>");
        return;
    }

    FileSystem fs = FileSystem.getLocal(null);
    Path inputFile = new Path(new File(argv[0]).getCanonicalPath());
    File schemaDbDir = new File(argv[1]).getCanonicalFile();
    FormatAnalyzer fa = new FormatAnalyzer(schemaDbDir);

    DataDescriptor descriptor = fa.describeData(fs, inputFile);
    System.err.println("Filename: " + descriptor.getFilename());
    System.err.println("Filetype identifier: " + descriptor.getFileTypeIdentifier());
    List<SchemaDescriptor> schemas = descriptor.getSchemaDescriptor();
    if (schemas == null) {
        System.err.println("No schema found.");
    } else {
        System.err.println("Num schemas found: " + schemas.size());
        System.err.println();
        for (SchemaDescriptor sd : schemas) {
            Schema s = sd.getSchema();
            System.err.println("Schema src desc: " + sd.getSchemaSourceDescription());
            System.err.println();
            System.err.println("Schema identifier: " + sd.getSchemaIdentifier());
            System.err.println();
            int i = 0;
            for (Iterator it = sd.getIterator(); it.hasNext();) {
                GenericData.Record curRow = (GenericData.Record) it.next();
                System.err.println(i + ". Elt: " + curRow);
                i++;
            }
        }
    }
}

From source file:com.cloudera.recordbreaker.analyzer.UnknownTextSchemaDescriptor.java

License:Open Source License

void computeSchema() throws IOException {
    this.randId = new Random().nextInt();
    LearnStructure ls = new LearnStructure();
    FileSystem fs = FSAnalyzer.getInstance().getFS();
    FileSystem localFS = FileSystem.getLocal(new Configuration());
    Path inputPath = dd.getFilename();

    File workingParserFile = File.createTempFile("textdesc", "typetree", null);
    File workingSchemaFile = File.createTempFile("textdesc", "schema", null);

    ls.inferRecordFormat(fs, inputPath, localFS, new Path(workingSchemaFile.getCanonicalPath()),
            new Path(workingParserFile.getCanonicalPath()), null, null, false, MAX_LINES);

    this.schema = Schema.parse(workingSchemaFile);
    DataInputStream in = new DataInputStream(localFS.open(new Path(workingParserFile.getCanonicalPath())));
    try {//  w w  w  .  j  av  a2s .c om
        this.typeTree = InferredType.readType(in);
    } catch (IOException iex) {
        iex.printStackTrace();
        throw iex;
    } finally {
        in.close();
    }
    //System.err.println("Recovered unknowntext schema: " + schema);
}

From source file:com.cloudera.recordbreaker.learnstructure.LearnStructure.java

License:Open Source License

public static void main(String argv[]) throws IOException {
    if (argv.length < 2) {
        System.err.println("Usage: LearnStructure <input-datafile> <outdir> (-emitAvro (true)|false)");
        return;/*from ww w . ja  va  2  s.  co  m*/
    }
    FileSystem localFS = FileSystem.getLocal(new Configuration());
    boolean emitAvro = true;
    int i = 0;
    Path f = new Path(new File(argv[i++]).getCanonicalPath());
    File outdir = new File(argv[i++]).getCanonicalFile();
    for (; i < argv.length; i++) {
        if ("-emitAvro".equals(argv[i])) {
            i++;
            emitAvro = "true".equals(argv[i]);
        }
    }

    System.err.println("Input file: " + f.toString());
    System.err.println("Output directory: " + outdir.getCanonicalPath());
    if (outdir.exists()) {
        throw new IOException("Output directory already exists: " + outdir);
    }
    outdir.mkdirs();
    Path schemaFile = new Path(outdir.getCanonicalPath(), SCHEMA_FILENAME);
    Path parseTreeFile = new Path(outdir.getCanonicalPath(), PARSER_FILENAME);
    Path jsonDataFile = null;
    Path avroDataFile = null;
    if (emitAvro) {
        jsonDataFile = new Path(outdir.getCanonicalPath(), JSONDATA_FILENAME);
        avroDataFile = new Path(outdir.getCanonicalPath(), DATA_FILENAME);
    }

    LearnStructure ls = new LearnStructure();
    ls.inferRecordFormat(localFS, f, localFS, schemaFile, parseTreeFile, jsonDataFile, avroDataFile, true, -1);
}

From source file:com.cloudera.recordbreaker.learnstructure.test.InferenceTest.java

License:Open Source License

/**
 * runSingletonTest() executes LearnStructure test for a single given input text file.
 *
 * @param inputData a <code>File</code> value
 * @return a <code>boolean</code> value;  did the test succeed?
 *//*from ww w .j a v  a2  s .  c  o  m*/
boolean runSingletonTest(File workingDir, File inputData) {
    File tmpSingletonDir = new File(workingDir, "testinference-" + inputData.getName());
    try {
        FileSystem localFS = FileSystem.getLocal(new Configuration());
        tmpSingletonDir.mkdir();
        Path schemaFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.SCHEMA_FILENAME);
        Path parseTreeFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.PARSER_FILENAME);
        Path jsonDataFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.JSONDATA_FILENAME);
        Path avroFile = new Path(tmpSingletonDir.getCanonicalPath(), LearnStructure.DATA_FILENAME);

        LearnStructure ls = new LearnStructure();
        // Check to see how many records exist in the original input
        int lineCount = 0;
        BufferedReader in2 = new BufferedReader(new FileReader(inputData));
        try {
            while (in2.readLine() != null) {
                lineCount++;
            }
        } finally {
            in2.close();
        }

        // Infer structure
        ls.inferRecordFormat(localFS, new Path(inputData.getCanonicalPath()), localFS, schemaFile,
                parseTreeFile, jsonDataFile, avroFile, false, lineCount);

        // Test the inferred structure
        // First, load in the avro file and see how many records there are.
        int avroCount = 0;
        DataFileReader in = new DataFileReader(new File(avroFile.toString()), new GenericDatumReader());
        try {
            Iterator it = in.iterator();
            while (it.hasNext()) {
                avroCount++;
                it.next();
            }
        } finally {
            in.close();
        }

        // Was the synthesized parser able to figure out the file?
        double parseRatio = avroCount / (1.0 * lineCount);
        return (parseRatio > MIN_PARSE_RATIO);
    } catch (IOException e) {
        try {
            System.err.println("File: " + inputData.getCanonicalPath());
        } catch (IOException ex) {
            ex.printStackTrace();
        }
        e.printStackTrace();
        return false;
    } finally {
        // remove temp files
        tmpSingletonDir.delete();
    }
}