Example usage for org.apache.hadoop.fs FileSystem open

List of usage examples for org.apache.hadoop.fs FileSystem open

Introduction

In this page you can find the example usage for org.apache.hadoop.fs FileSystem open.

Prototype

public FSDataInputStream open(PathHandle fd) throws IOException 

Source Link

Document

Open an FSDataInputStream matching the PathHandle instance.

Usage

From source file:com.jkoolcloud.tnt4j.streams.inputs.HdfsFileLineStreamTest.java

License:Apache License

@Test()
public void test() throws Exception {
    FileSystem fs = mock(FileSystem.class);
    HdfsFileLineStream stream = new HdfsFileLineStream();

    TestFileList files = new TestFileList(false);

    final String fileName = ("file:////" + files.get(0).getParentFile() + File.separator + files.getPrefix() // NON-NLS
            + "*.TST").replace("\\", "/"); // NON-NLS

    Map<String, String> props = new HashMap<>(2);
    props.put(StreamProperties.PROP_FILENAME, fileName);
    props.put(StreamProperties.PROP_RESTORE_STATE, "false"); // NON-NLS

    when(fs.open(any(Path.class))).thenReturn(new FSDataInputStream(new TestInputStreamStub()));
    final FileStatus fileStatusMock = mock(FileStatus.class);
    final FileStatus[] array = new FileStatus[10];
    Arrays.fill(array, fileStatusMock);
    when(fs.listStatus(any(Path.class), any(PathFilter.class))).thenReturn(array);
    when(fileStatusMock.getModificationTime()).thenReturn(1L, 2L, 3L);
    when(fileStatusMock.getPath()).thenReturn(mock(Path.class));
    when(fs.getContentSummary(any(Path.class))).thenReturn(mock(ContentSummary.class));

    Method m = FileSystem.class.getDeclaredMethod("addFileSystemForTesting", URI.class, Configuration.class, // NON-NLS
            FileSystem.class);
    m.setAccessible(true);//  w  w  w .j a v  a  2 s.c  om
    m.invoke(FileSystem.class, URI.create(fileName), new Configuration(), fs);

    StreamThread st = mock(StreamThread.class);
    st.setName("HdfsFileLineStreamTestThreadName"); // NON-NLS
    stream.setOwnerThread(st);

    stream.setProperties(props.entrySet());
    stream.startStream();

    verify(fileStatusMock, atLeastOnce()).getModificationTime();
    verify(fileStatusMock, atLeastOnce()).getPath();
    verify(fs, atLeastOnce()).listStatus(any(Path.class), any(PathFilter.class));

    stream.cleanup();
}

From source file:com.kadwa.hadoop.DistExec.java

License:Open Source License

private static List<Path> fetchFileList(Configuration conf, Path srcList) throws IOException {
    List<Path> result = new ArrayList<Path>();
    FileSystem fs = srcList.getFileSystem(conf);
    BufferedReader input = null;//  w  ww  . ja  va  2s. co  m
    try {
        input = new BufferedReader(new InputStreamReader(fs.open(srcList)));
        String line = input.readLine();
        while (line != null) {
            result.add(new Path(line));
            line = input.readLine();
        }
    } finally {
        checkAndClose(input);
    }
    return result;
}

From source file:com.knewton.mapreduce.SSTableRecordReader.java

License:Apache License

/**
 * Copies a remote path to the local filesystem, while updating hadoop that we're making
 * progress. Doesn't support directories.
 *///  w  w  w  .  ja v a 2 s . co  m
@VisibleForTesting
void copyToLocalFile(FileSystem remoteFS, FileSystem localFS, Path remote, Path local) throws IOException {
    // don't support transferring from remote directories
    FileStatus remoteStat = remoteFS.getFileStatus(remote);
    Preconditions.checkArgument(!remoteStat.isDirectory(), String.format("Path %s is directory!", remote));
    // if local is a dir, copy to inside that dir, like 'cp /path/file /tmp/' would do
    if (localFS.exists(local)) {
        FileStatus localStat = localFS.getFileStatus(local);
        if (localStat.isDirectory()) {
            local = new Path(local, remote.getName());
        }
    }
    long remoteFileSize = remoteStat.getLen();
    // do actual copy
    InputStream in = null;
    OutputStream out = null;
    try {
        long startTime = System.currentTimeMillis();
        long lastLogTime = 0;
        long bytesCopied = 0;
        in = remoteFS.open(remote);
        out = localFS.create(local, true);
        int buffSize = this.conf.getInt(CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_KEY,
                CommonConfigurationKeys.IO_FILE_BUFFER_SIZE_DEFAULT);
        byte[] buf = new byte[buffSize];
        int bytesRead = in.read(buf);
        while (bytesRead >= 0) {
            long now = System.currentTimeMillis();
            // log transfer rate once per min, starting 1 min after transfer began
            if (now - lastLogTime > 60000L && now - startTime > 60000L) {
                double elapsedSec = (now - startTime) / 1000D;
                double bytesPerSec = bytesCopied / elapsedSec;
                LOG.info("Transferred {} of {} bytes at {} bytes per second", bytesCopied, remoteFileSize,
                        bytesPerSec);
                lastLogTime = now;
            }
            this.ctx.progress();
            out.write(buf, 0, bytesRead);
            bytesCopied += bytesRead;
            bytesRead = in.read(buf);
        }
        // try to close these outside of finally so we receive exception on failure
        out.close();
        out = null;
        in.close();
        in = null;
    } finally {
        // make sure everything's closed
        IOUtils.closeStream(out);
        IOUtils.closeStream(in);
    }
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

/**
 * Tests to see if tables can be correctly copied locally
 *//*from   ww w .java2  s. com*/
@Test
public void testCopyTablesToLocal() throws Exception {
    TaskAttemptContext context = getTaskAttemptContext(true, true, true);
    ssTableColumnRecordReader.initialize(inputSplit, context);

    doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class),
            any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class));

    FileSystem remoteFS = mock(FileSystem.class);
    FileSystem localFS = mock(FileSystem.class);

    byte[] data = new byte[] { 0xA };
    FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data));
    FSDataOutputStream fsOut = mock(FSDataOutputStream.class);

    when(remoteFS.open(any(Path.class))).thenReturn(fsIn);
    when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut);

    Path dataTablePath = inputSplit.getPath();
    FileStatus fileStatus = mock(FileStatus.class);
    when(fileStatus.getLen()).thenReturn(10L);
    when(fileStatus.isDirectory()).thenReturn(false);
    when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus);

    ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    verify(remoteFS).getFileStatus(dataTablePath);
    ssTableColumnRecordReader.close();
    verify(fsOut).write(any(byte[].class), eq(0), eq(data.length));
    assertEquals(2, ssTableColumnRecordReader.getComponentSize());
}

From source file:com.knewton.mapreduce.SSTableRecordReaderTest.java

License:Apache License

/**
 * Tests to see if tables can be correctly copied locally including the compression info table
 *///  ww w.j  a v a  2  s.com
@Test
public void testCopyTablesToLocalWithCompressionInfo() throws Exception {
    TaskAttemptContext context = getTaskAttemptContext(true, true, true);
    ssTableColumnRecordReader.initialize(inputSplit, context);

    doCallRealMethod().when(ssTableColumnRecordReader).copyTablesToLocal(any(FileSystem.class),
            any(FileSystem.class), any(Path.class), any(TaskAttemptContext.class));

    FileSystem remoteFS = mock(FileSystem.class);
    FileSystem localFS = mock(FileSystem.class);

    byte[] data = new byte[] { 0xA };
    FSDataInputStream fsIn = new FSDataInputStream(new MemoryDataInputStream(data));
    FSDataOutputStream fsOut = mock(FSDataOutputStream.class);

    when(remoteFS.open(any(Path.class))).thenReturn(fsIn);
    when(localFS.create(any(Path.class), anyBoolean())).thenReturn(fsOut);

    Path dataTablePath = inputSplit.getPath();
    FileStatus fileStatus = mock(FileStatus.class);
    when(fileStatus.getLen()).thenReturn(10L);
    when(fileStatus.isDirectory()).thenReturn(false);
    when(remoteFS.getFileStatus(any(Path.class))).thenReturn(fileStatus);

    String str = ssTableColumnRecordReader.getDescriptor().filenameFor(Component.COMPRESSION_INFO);
    when(remoteFS.exists(new Path(str))).thenReturn(true);

    ssTableColumnRecordReader.copyTablesToLocal(remoteFS, localFS, dataTablePath, context);
    verify(remoteFS).getFileStatus(dataTablePath);
    ssTableColumnRecordReader.close();
    verify(fsOut).write(any(byte[].class), eq(0), eq(data.length));
    assertEquals(3, ssTableColumnRecordReader.getComponentSize());
}

From source file:com.knewton.mrtool.io.JsonRecordReader.java

License:Apache License

/**
 * Get the line reader to be used for the file. A <code>LineReader</code> can read a file line
 * by line. This separate method helps with testing too.
 * /*from  ww w  .j  av a2s .  co  m*/
 * @param fileSplit
 * @param conf
 * @return
 * @throws IOException
 */
protected LineReader initLineReader(FileSplit fileSplit, Configuration conf) throws IOException {
    final Path file = fileSplit.getPath();
    final CompressionCodec codec = compressionCodecs.getCodec(file);
    FileSystem fs = file.getFileSystem(conf);
    FSDataInputStream fileIn = fs.open(fileSplit.getPath());
    seekableIn = fileIn;
    boolean skipFirstLine = false;
    LineReader lineReader;
    if (codec != null) {
        lineReader = new LineReader(codec.createInputStream(fileIn), conf);
    } else {
        // if the start is not the beginning of the file then skip the first line to get the
        // next complete json record. The previous json record will be read by the record reader
        // that got assigned the previous InputSplit.
        if (start != 0) {
            skipFirstLine = true;
            --start;
            fileIn.seek(start);
        }
        lineReader = new LineReader(fileIn, conf);
    }
    if (skipFirstLine) {
        start += lineReader.readLine(new Text(), 0, (int) Math.min((long) Integer.MAX_VALUE, end - start));
    }
    return lineReader;
}

From source file:com.kse.bigdata.file.SequenceSampler.java

License:Apache License

public LinkedList<Sequence> getRandomSample() {
    System.out.println("Sampling Start...");
    System.out.println("Sample Size is  " + SAMPLE_SIZE);

    try {/* ww w . j av  a 2s  .  c  om*/
        FileSystem fs = FileSystem.get(new Configuration());
        BufferedReader fileReader = new BufferedReader(new InputStreamReader(fs.open(sampleFile)));
        LinkedList<Double> deque = new LinkedList<Double>();
        String line;
        int[] sampleIndexes = getRandomSampleIndexArray();
        int counter = -1;

        while ((line = fileReader.readLine()) != null) {
            counter++;

            deque.add(extractValidInformation(line));

            if (deque.size() == Sequence.SIZE_OF_SEQUENCE) {

                for (int sampleIndex : sampleIndexes)
                    if (sampleIndex == counter)
                        randomSamples.add(new Sequence(deque));

                deque.removeFirst();
            }

            if (randomSamples.size() == SAMPLE_SIZE)
                return randomSamples;

        }
    } catch (IOException e) {
        e.printStackTrace();
    }

    return this.randomSamples;
}

From source file:com.kse.bigdata.main.Driver.java

License:Apache License

public static void main(String[] args) throws Exception {
    /**********************************************************************************
     **    Merge the source files into one.                                          **
    /**    Should change the directories of each file before executing the program   **
    ***********************************************************************************/
    //        String inputFileDirectory = "/media/bk/??/BigData_Term_Project/Debug";
    //        String resultFileDirectory = "/media/bk/??/BigData_Term_Project/debug.csv";
    //        File resultFile = new File(resultFileDirectory);
    //        if(!resultFile.exists())
    //            new SourceFileMerger(inputFileDirectory, resultFileDirectory).mergeFiles();

    /**********************************************************************************
     * Hadoop Operation.//ww w .jav a  2 s  . c  o  m
     * Befort Start, Check the Length of Sequence We Want to Predict.
     **********************************************************************************/

    Configuration conf = new Configuration();

    //Enable MapReduce intermediate compression as Snappy
    conf.setBoolean("mapred.compress.map.output", true);
    conf.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.SnappyCodec");

    //Enable Profiling
    //conf.setBoolean("mapred.task.profile", true);

    String testPath = null;
    String inputPath = null;
    String outputPath = null;

    int sampleSize = 1;
    ArrayList<String> results = new ArrayList<String>();

    for (int index = 0; index < args.length; index++) {

        /*
         * Mandatory command
         */
        //Extract input path string from command line.
        if (args[index].equals("-in"))
            inputPath = args[index + 1];

        //Extract output path string from command line.
        if (args[index].equals("-out"))
            outputPath = args[index + 1];

        //Extract test data path string from command line.
        if (args[index].equals("-test"))
            testPath = args[index + 1];

        /*
         * Optional command
         */
        //Extract a number of neighbors.
        if (args[index].equals("-nn"))
            conf.setInt(Reduce.NUMBER_OF_NEAREAST_NEIGHBOR, Integer.parseInt(args[index + 1]));

        //Whether job uses normalization or not.
        if (args[index].equals("-norm"))
            conf.setBoolean(Map.NORMALIZATION, true);

        //Extract the number of sample size to test.
        if (args[index].equals("-s"))
            sampleSize = Integer.valueOf(args[index + 1]);

        //Whether job uses mean or median
        //[Default : mean]
        if (args[index].equals("-med"))
            conf.setBoolean(Reduce.MEDIAN, true);
    }

    String outputFileName = "part-r-00000";
    SequenceSampler sampler = new SequenceSampler(testPath, sampleSize);
    LinkedList<Sequence> testSequences = sampler.getRandomSample();

    //        Test Sequence
    //        String testSeqString = "13.591-13.674-13.778-13.892-13.958-14.049-14.153-14.185-14.169-14.092-13.905-13.702-13.438-13.187-13.0-12.914-12.868-12.766-12.62-12.433-12.279-12.142-12.063-12.025-100";
    //        Sequence testSeq = new Sequence(testSeqString);
    //        LinkedList<Sequence> testSequences = new LinkedList<>();
    //        testSequences.add(testSeq);

    for (Sequence seq : testSequences) {

        /*
         ********************  Hadoop Launch ***********************
         */

        System.out.println(seq.getTailString());

        conf.set(Map.INPUT_SEQUENCE, seq.toString());

        Job job = new Job(conf);
        job.setJarByClass(Driver.class);
        job.setJobName("term-project-driver");

        job.setMapperClass(Map.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Text.class);

        //          Should think another way to implement the combiner class
        //          Current Implementation is not helpful to Job.
        //          job.setCombinerClass(Combiner.class);

        //Set 1 for number of reduce task for keeping 100 most neighbors in sorted set.
        job.setNumReduceTasks(1);
        job.setReducerClass(Reduce.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);

        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);

        FileInputFormat.setInputPaths(job, new Path(inputPath));
        FileOutputFormat.setOutputPath(job, new Path(outputPath));

        job.waitForCompletion(true);

        /*
         * if job finishes, get result of the job and store it in results(list).
         */
        try {
            FileSystem hdfs = FileSystem.get(new Configuration());
            BufferedReader fileReader = new BufferedReader(
                    new InputStreamReader(hdfs.open(new Path(outputPath + "/" + outputFileName))));

            String line;
            while ((line = fileReader.readLine()) != null) {
                results.add(seq.getSeqString() + " " + line);
            }

            fileReader.close();

            hdfs.delete(new Path(outputPath), true);
            hdfs.close();

        } catch (IOException e) {
            e.printStackTrace();
            System.exit(1);
        }
    }

    /*
     * if all jobs finish, store results of jobs to output/result.txt file.
     */
    String finalOutputPath = "output/result.csv";
    try {
        FileSystem hdfs = FileSystem.get(new Configuration());
        Path file = new Path(finalOutputPath);
        if (hdfs.exists(file)) {
            hdfs.delete(file, true);
        }

        OutputStream os = hdfs.create(file);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(os, "UTF-8"));

        //CSV File Header
        printWriter.println("Actual,Predicted,MER,MAE");
        printWriter.flush();

        for (String result : results) {
            String[] tokens = result.split("\\s+");

            printWriter.println(tokens[0] + "," + tokens[1] + "," + tokens[2] + "," + tokens[3]);
            printWriter.flush();
        }

        printWriter.close();
        hdfs.close();
    } catch (IOException e) {
        e.printStackTrace();
        System.exit(1);
    }

}

From source file:com.kxen.han.projection.giraph.BspCase.java

License:Apache License

/**
 * Read all parts- files in the output and count their lines.
 * This works only for textual output!/*from  w w  w.  j  a  v  a  2s  . co m*/
 *
 * @param conf Configuration
 * @param outputPath Output path
 * @return Number of output lines
 * @throws IOException
 */
public int getNumResults(Configuration conf, Path outputPath) throws IOException {
    FileSystem fs = FileSystem.get(conf);
    int numResults = 0;
    for (FileStatus status : fs.listStatus(outputPath, PARTS_FILTER)) {
        FSDataInputStream in = null;
        BufferedReader reader = null;
        try {
            in = fs.open(status.getPath());
            reader = new BufferedReader(new InputStreamReader(in, Charsets.UTF_8));
            while (reader.readLine() != null) {
                numResults++;
            }
        } finally {
            Closeables.closeQuietly(in);
            Closeables.closeQuietly(reader);
        }
    }
    return numResults;
}

From source file:com.kylinolap.common.persistence.HBaseResourceStore.java

License:Apache License

@Override
protected InputStream getResourceImpl(String resPath) throws IOException {
    Result r = getByScan(resPath, B_FAMILY, B_COLUMN);
    if (r == null)
        return null;

    byte[] value = r.getValue(B_FAMILY, B_COLUMN);
    if (value.length == 0) {
        Path redirectPath = bigCellHDFSPath(resPath);
        Configuration hconf = HadoopUtil.getCurrentConfiguration();
        FileSystem fileSystem = FileSystem.get(hconf);

        return fileSystem.open(redirectPath);
    } else {// w  w  w . j  av a2 s . c  o m
        return new ByteArrayInputStream(value);
    }
}