Example usage for org.apache.hadoop.mapred InputSplit getLocations

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred InputSplit getLocations.

Prototype

String[] getLocations() throws IOException;

Source Link

Document

Get the list of hostnames where the input split is located.

Usage

From source file:alluxio.hadoop.HadoopUtils.java

License:Apache License

/**
 * Returns a string representation of a {@link InputSplit}.
 *
 * @param is Hadoop {@link InputSplit}//from  w  w w. j  av a  2s  .c o m
 * @return its string representation
 */
public static String toStringHadoopInputSplit(InputSplit is) {
    StringBuilder sb = new StringBuilder("HadoopInputSplit: ");
    try {
        sb.append(" Length: ").append(is.getLength());
        sb.append(" , Locations: ");
        for (String loc : is.getLocations()) {
            sb.append(loc).append(" ; ");
        }
    } catch (IOException e) {
        LOG.error(e.getMessage());
    }
    return sb.toString();
}

From source file:com.ibm.jaql.lang.expr.io.InputSplitsFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    JsonValue iod = exprs[0].eval(context);

    Adapter adapter = JaqlUtil.getAdapterStore().input.getAdapter(iod);
    if (!(adapter instanceof HadoopInputAdapter)) {
        throw new ClassCastException("i/o descriptor must be for an input format");
    }/*w  w  w  .  j  a  v  a  2s .com*/
    HadoopInputAdapter hia = (HadoopInputAdapter) adapter;
    JobConf conf = new JobConf(); // TODO: allow configuration
    hia.setParallel(conf); // right thing to do?
    hia.configure(conf); // right thing to do?
    int numSplits = conf.getNumMapTasks(); // TODO: allow override
    final InputSplit[] splits = hia.getSplits(conf, numSplits);
    final MutableJsonString className = new MutableJsonString();
    final MutableJsonBinary rawSplit = new MutableJsonBinary();
    final BufferedJsonRecord rec = new BufferedJsonRecord(3);
    final BufferedJsonArray locArr = new BufferedJsonArray();
    rec.add(CLASS_TAG, className);
    rec.add(SPLIT_TAG, rawSplit);
    rec.add(LOCATIONS_TAG, locArr);

    return new JsonIterator(rec) {
        DataOutputBuffer out = new DataOutputBuffer();
        int i = 0;

        @Override
        public boolean moveNext() throws Exception {
            if (i >= splits.length) {
                return false;
            }
            InputSplit split = splits[i++];
            className.setCopy(split.getClass().getName());
            out.reset();
            split.write(out);
            rawSplit.setCopy(out.getData(), out.getLength());
            locArr.clear();
            String[] locs = split.getLocations();
            if (locs != null) {
                for (String loc : locs) {
                    locArr.add(new JsonString(loc));
                }
            }
            return true;
        }
    };
}

From source file:com.moz.fiji.hive.FijiTableInputFormat.java

License:Apache License

/**
 * Returns an object responsible for generating records contained in a
 * given input split./*from  w w w .  j  a  v a2 s  . c o  m*/
 *
 * @param split The input split to create a record reader for.
 * @param job The job configuration.
 * @param reporter A job info reporter (for counters, status, etc.).
 * @return The record reader.
 * @throws IOException If there is an error.
 */
@Override
public RecordReader<ImmutableBytesWritable, FijiRowDataWritable> getRecordReader(InputSplit split, JobConf job,
        Reporter reporter) throws IOException {
    LOG.info("Getting record reader {}", split.getLocations());
    return new FijiTableRecordReader((FijiTableInputSplit) split, job);
}

From source file:com.yahoo.druid.hadoop.HiveDatasourceInputFormat.java

License:Apache License

@Override
public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
    logger.info("checkPost #5");

    String overlordUrl = jobConf.get(CONF_DRUID_OVERLORD_HOSTPORT);
    Preconditions.checkArgument(overlordUrl != null && !overlordUrl.isEmpty(),
            CONF_DRUID_OVERLORD_HOSTPORT + " not defined");

    logger.info("druid overlord url = " + overlordUrl);

    String schemaStr = jobConf.get(CONF_DRUID_SCHEMA);

    Preconditions.checkArgument(schemaStr != null && !schemaStr.isEmpty(),
            "schema undefined,  provide " + CONF_DRUID_SCHEMA);
    logger.info("schema = " + schemaStr);

    DatasourceIngestionSpec ingestionSpec = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(schemaStr,
            DatasourceIngestionSpec.class);
    String segmentsStr = getSegmentsToLoad(ingestionSpec.getDataSource(), ingestionSpec.getIntervals(),
            overlordUrl);//from  w  w w .j  av a  2 s .co  m
    logger.info("segments list received from overlord = " + segmentsStr);

    List<DataSegment> segmentsList = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(segmentsStr,
            new TypeReference<List<DataSegment>>() {
            });
    VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<>(
            Ordering.natural());
    for (DataSegment segment : segmentsList) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }
    final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = timeline
            .lookup(ingestionSpec.getIntervals().get(0));
    final List<WindowedDataSegment> windowedSegments = new ArrayList<>();
    for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
        for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            windowedSegments.add(new WindowedDataSegment(chunk.getObject(), holder.getInterval()));
        }
    }

    jobConf.set(CONF_INPUT_SEGMENTS, HadoopDruidIndexerConfig.JSON_MAPPER.writeValueAsString(windowedSegments));

    segmentsStr = Preconditions.checkNotNull(jobConf.get(CONF_INPUT_SEGMENTS), "No segments found to read");
    List<WindowedDataSegment> segments = HadoopDruidIndexerConfig.JSON_MAPPER.readValue(segmentsStr,
            new TypeReference<List<WindowedDataSegment>>() {
            });
    if (segments == null || segments.size() == 0) {
        throw new ISE("No segments found to read");
    }

    logger.info("segments to read " + segmentsStr);

    long maxSize = numSplits;

    if (maxSize > 0) {
        // combining is to happen, let us sort the segments list by size so that
        // they
        // are combined appropriately
        Collections.sort(segments, new Comparator<WindowedDataSegment>() {
            @Override
            public int compare(WindowedDataSegment s1, WindowedDataSegment s2) {
                return Long.compare(s1.getSegment().getSize(), s2.getSegment().getSize());
            }
        });
    }

    List<InputSplit> splits = Lists.newArrayList();

    List<WindowedDataSegment> list = new ArrayList<>();
    long size = 0;

    // JobConf dummyConf = new JobConf();
    Job job = new Job(jobConf);
    JobContext jobContext = ShimLoader.getHadoopShims().newJobContext(job);
    Path[] paths = org.apache.hadoop.mapreduce.lib.input.FileInputFormat.getInputPaths(jobContext);
    logger.info("dummyPath : " + paths);

    jobConf.set("druid.hive.dummyfilename", paths[0].toString());

    InputFormat fio = supplier.get();
    for (WindowedDataSegment segment : segments) {
        if (size + segment.getSegment().getSize() > maxSize && size > 0) {
            splits.add(toDataSourceSplit(list, fio, jobConf, paths[0]));
            list = Lists.newArrayList();
            size = 0;
        }

        list.add(segment);
        size += segment.getSegment().getSize();
    }

    if (list.size() > 0) {
        splits.add(toDataSourceSplit(list, fio, jobConf, paths[0]));
    }

    logger.info("Number of splits: " + splits.size());
    for (InputSplit split : splits) {
        logger.info(split.getClass().getName());
        for (String location : split.getLocations())
            logger.info(location);
    }
    return Iterables.toArray(splits, InputSplit.class);
}

From source file:com.yahoo.druid.hadoop.HiveDatasourceInputFormat.java

License:Apache License

private String[] getFrequentLocations(List<WindowedDataSegment> segments, InputFormat fio, JobConf conf)
        throws IOException {
    Iterable<String> locations = Collections.emptyList();
    for (WindowedDataSegment segment : segments) {
        FileInputFormat.setInputPaths(conf, new Path(JobHelper.getURIFromSegment(segment.getSegment())));
        logger.info("CheckPost 4" + fio.getClass());
        for (InputSplit split : fio.getSplits(conf, 1)) {
            locations = Iterables.concat(locations, Arrays.asList(split.getLocations()));
        }//from w  ww . ja  v a  2  s  . c  om
    }
    return getFrequentLocations(locations);
}

From source file:edu.uci.ics.asterix.external.indexing.dataflow.IndexingScheduler.java

License:Apache License

/**
 * Scan the splits once and build a popularity map
 * /*from w w  w  .  ja  va  2  s .  c om*/
 * @param splits
 *            the split array
 * @param locationToNumOfSplits
 *            the map to be built
 * @throws IOException
 */
private void buildPopularityMap(InputSplit[] splits, Map<String, IntWritable> locationToNumOfSplits)
        throws IOException {
    for (InputSplit split : splits) {
        String[] locations = split.getLocations();
        for (String loc : locations) {
            IntWritable locCount = locationToNumOfSplits.get(loc);
            if (locCount == null) {
                locCount = new IntWritable(0);
                locationToNumOfSplits.put(loc, locCount);
            }
            locCount.set(locCount.get() + 1);
        }
    }
}

From source file:edu.uci.ics.hyracks.hdfs.scheduler.IPProximityNcCollectionBuilder.java

License:Apache License

@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
        final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
        final int[] workloads, final int slotLimit) {
    final TreeMap<BytesWritable, IntWritable> availableIpsToSlots = new TreeMap<BytesWritable, IntWritable>();
    for (int i = 0; i < workloads.length; i++) {
        if (workloads[i] < slotLimit) {
            byte[] rawip;
            try {
                rawip = ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress();
            } catch (UnknownHostException e) {
                // QQQ Should probably have a neater solution than this
                throw new RuntimeException(e);
            }/*from   w  w  w. ja  va  2 s.  co  m*/
            BytesWritable ip = new BytesWritable(rawip);
            IntWritable availableSlot = availableIpsToSlots.get(ip);
            if (availableSlot == null) {
                availableSlot = new IntWritable(slotLimit - workloads[i]);
                availableIpsToSlots.put(ip, availableSlot);
            } else {
                availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
            }
        }
    }
    return new INcCollection() {

        @Override
        public String findNearestAvailableSlot(InputSplit split) {
            try {
                String[] locs = split.getLocations();
                int minDistance = Integer.MAX_VALUE;
                BytesWritable currentCandidateIp = null;
                if (locs == null || locs.length > 0) {
                    for (int j = 0; j < locs.length; j++) {
                        /**
                         * get all the IP addresses from the name
                         */
                        InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
                        for (InetAddress ip : allIps) {
                            BytesWritable splitIp = new BytesWritable(ip.getAddress());
                            /**
                             * if the node controller exists
                             */
                            BytesWritable candidateNcIp = availableIpsToSlots.floorKey(splitIp);
                            if (candidateNcIp == null) {
                                candidateNcIp = availableIpsToSlots.ceilingKey(splitIp);
                            }
                            if (candidateNcIp != null) {
                                if (availableIpsToSlots.get(candidateNcIp).get() > 0) {
                                    byte[] candidateIP = candidateNcIp.getBytes();
                                    byte[] splitIP = splitIp.getBytes();
                                    int candidateInt = candidateIP[0] << 24 | (candidateIP[1] & 0xFF) << 16
                                            | (candidateIP[2] & 0xFF) << 8 | (candidateIP[3] & 0xFF);
                                    int splitInt = splitIP[0] << 24 | (splitIP[1] & 0xFF) << 16
                                            | (splitIP[2] & 0xFF) << 8 | (splitIP[3] & 0xFF);
                                    int distance = Math.abs(candidateInt - splitInt);
                                    if (minDistance > distance) {
                                        minDistance = distance;
                                        currentCandidateIp = candidateNcIp;
                                    }
                                }
                            }
                        }
                    }
                } else {
                    for (Entry<BytesWritable, IntWritable> entry : availableIpsToSlots.entrySet()) {
                        if (entry.getValue().get() > 0) {
                            currentCandidateIp = entry.getKey();
                            break;
                        }
                    }
                }

                if (currentCandidateIp != null) {
                    /**
                     * Update the entry of the selected IP
                     */
                    IntWritable availableSlot = availableIpsToSlots.get(currentCandidateIp);
                    availableSlot.set(availableSlot.get() - 1);
                    if (availableSlot.get() == 0) {
                        availableIpsToSlots.remove(currentCandidateIp);
                    }
                    /**
                     * Update the entry of the selected NC
                     */
                    List<String> dataLocations = ipToNcMapping
                            .get(InetAddress.getByAddress(currentCandidateIp.getBytes()).getHostAddress());
                    for (String nc : dataLocations) {
                        int ncIndex = ncNameToIndex.get(nc);
                        if (workloads[ncIndex] < slotLimit) {
                            return nc;
                        }
                    }
                }
                /** not scheduled */
                return null;
            } catch (Exception e) {
                throw new IllegalStateException(e);
            }
        }

        @Override
        public int numAvailableSlots() {
            return availableIpsToSlots.size();
        }

    };
}

From source file:edu.uci.ics.hyracks.hdfs.scheduler.RackAwareNcCollectionBuilder.java

License:Apache License

@Override
public INcCollection build(Map<String, NodeControllerInfo> ncNameToNcInfos,
        final Map<String, List<String>> ipToNcMapping, final Map<String, Integer> ncNameToIndex, String[] NCs,
        final int[] workloads, final int slotLimit) {
    try {/*from  w  w  w . j a  v  a  2  s  .  c o  m*/
        final Map<List<Integer>, List<String>> pathToNCs = new HashMap<List<Integer>, List<String>>();
        for (int i = 0; i < NCs.length; i++) {
            List<Integer> path = new ArrayList<Integer>();
            String ipAddress = InetAddress
                    .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress())
                    .getHostAddress();
            topology.lookupNetworkTerminal(ipAddress, path);
            if (path.size() <= 0) {
                // if the hyracks nc is not in the defined cluster
                path.add(Integer.MIN_VALUE);
                LOGGER.info(NCs[i] + "'s IP address is not in the cluster toplogy file!");
            }
            List<String> ncs = pathToNCs.get(path);
            if (ncs == null) {
                ncs = new ArrayList<String>();
                pathToNCs.put(path, ncs);
            }
            ncs.add(NCs[i]);
        }

        final TreeMap<List<Integer>, IntWritable> availableIpsToSlots = new TreeMap<List<Integer>, IntWritable>(
                new Comparator<List<Integer>>() {

                    @Override
                    public int compare(List<Integer> l1, List<Integer> l2) {
                        int commonLength = Math.min(l1.size(), l2.size());
                        for (int i = 0; i < commonLength; i++) {
                            Integer value1 = l1.get(i);
                            Integer value2 = l2.get(i);
                            int cmp = value1 > value2 ? 1 : (value1 < value2 ? -1 : 0);
                            if (cmp != 0) {
                                return cmp;
                            }
                        }
                        return l1.size() > l2.size() ? 1 : (l1.size() < l2.size() ? -1 : 0);
                    }

                });
        for (int i = 0; i < workloads.length; i++) {
            if (workloads[i] < slotLimit) {
                List<Integer> path = new ArrayList<Integer>();
                String ipAddress = InetAddress
                        .getByAddress(ncNameToNcInfos.get(NCs[i]).getNetworkAddress().lookupIpAddress())
                        .getHostAddress();
                topology.lookupNetworkTerminal(ipAddress, path);
                if (path.size() <= 0) {
                    // if the hyracks nc is not in the defined cluster
                    path.add(Integer.MIN_VALUE);
                }
                IntWritable availableSlot = availableIpsToSlots.get(path);
                if (availableSlot == null) {
                    availableSlot = new IntWritable(slotLimit - workloads[i]);
                    availableIpsToSlots.put(path, availableSlot);
                } else {
                    availableSlot.set(slotLimit - workloads[i] + availableSlot.get());
                }
            }
        }
        return new INcCollection() {

            @Override
            public String findNearestAvailableSlot(InputSplit split) {
                try {
                    String[] locs = split.getLocations();
                    int minDistance = Integer.MAX_VALUE;
                    List<Integer> currentCandidatePath = null;
                    if (locs == null || locs.length > 0) {
                        for (int j = 0; j < locs.length; j++) {
                            /**
                             * get all the IP addresses from the name
                             */
                            InetAddress[] allIps = InetAddress.getAllByName(locs[j]);
                            boolean inTopology = false;
                            for (InetAddress ip : allIps) {
                                List<Integer> splitPath = new ArrayList<Integer>();
                                boolean inCluster = topology.lookupNetworkTerminal(ip.getHostAddress(),
                                        splitPath);
                                if (!inCluster) {
                                    continue;
                                }
                                inTopology = true;
                                /**
                                 * if the node controller exists
                                 */
                                List<Integer> candidatePath = availableIpsToSlots.floorKey(splitPath);
                                if (candidatePath == null) {
                                    candidatePath = availableIpsToSlots.ceilingKey(splitPath);
                                }
                                if (candidatePath != null) {
                                    if (availableIpsToSlots.get(candidatePath).get() > 0) {
                                        int distance = distance(splitPath, candidatePath);
                                        if (minDistance > distance) {
                                            minDistance = distance;
                                            currentCandidatePath = candidatePath;
                                        }
                                    }

                                }
                            }

                            if (!inTopology) {
                                LOGGER.info(locs[j] + "'s IP address is not in the cluster toplogy file!");
                                /**
                                 * if the machine is not in the toplogy file
                                 */
                                List<Integer> candidatePath = null;
                                for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
                                    if (entry.getValue().get() > 0) {
                                        candidatePath = entry.getKey();
                                        break;
                                    }
                                }
                                /** the split path is empty */
                                if (candidatePath != null) {
                                    if (availableIpsToSlots.get(candidatePath).get() > 0) {
                                        currentCandidatePath = candidatePath;
                                    }
                                }
                            }
                        }
                    } else {
                        for (Entry<List<Integer>, IntWritable> entry : availableIpsToSlots.entrySet()) {
                            if (entry.getValue().get() > 0) {
                                currentCandidatePath = entry.getKey();
                                break;
                            }
                        }
                    }

                    if (currentCandidatePath != null && currentCandidatePath.size() > 0) {
                        /**
                         * Update the entry of the selected IP
                         */
                        IntWritable availableSlot = availableIpsToSlots.get(currentCandidatePath);
                        availableSlot.set(availableSlot.get() - 1);
                        if (availableSlot.get() == 0) {
                            availableIpsToSlots.remove(currentCandidatePath);
                        }
                        /**
                         * Update the entry of the selected NC
                         */
                        List<String> candidateNcs = pathToNCs.get(currentCandidatePath);
                        for (String candidate : candidateNcs) {
                            int ncIndex = ncNameToIndex.get(candidate);
                            if (workloads[ncIndex] < slotLimit) {
                                return candidate;
                            }
                        }
                    }
                    /** not scheduled */
                    return null;
                } catch (Exception e) {
                    throw new IllegalStateException(e);
                }
            }

            @Override
            public int numAvailableSlots() {
                return availableIpsToSlots.size();
            }

            private int distance(List<Integer> splitPath, List<Integer> candidatePath) {
                int commonLength = Math.min(splitPath.size(), candidatePath.size());
                int distance = 0;
                for (int i = 0; i < commonLength; i++) {
                    distance = distance * 100 + Math.abs(splitPath.get(i) - candidatePath.get(i));
                }
                List<Integer> restElements = splitPath.size() > candidatePath.size() ? splitPath
                        : candidatePath;
                for (int i = commonLength; i < restElements.size(); i++) {
                    distance = distance * 100 + Math.abs(restElements.get(i));
                }
                return distance;
            }
        };
    } catch (Exception e) {
        throw new IllegalStateException(e);
    }
}

From source file:org.apache.drill.exec.store.hive.HiveInputReader.java

License:Apache License

public static void main(String args[]) throws Exception {
    /*//from   w ww  .  j a  v  a  2 s  .  c  o m
        String[] columnNames = {"n_nationkey", "n_name", "n_regionkey",   "n_comment"};
        String[] columnTypes = {"bigint", "string", "bigint", "string"};
            
        List<FieldSchema> cols = Lists.newArrayList();
            
        for (int i = 0; i < columnNames.length; i++) {
          cols.add(new FieldSchema(columnNames[i], columnTypes[i], null));
        }
        String location = "file:///tmp/nation_s";
        String inputFormat = TextInputFormat.class.getCanonicalName();
        String serdeLib = LazySimpleSerDe.class.getCanonicalName();
    //    String inputFormat = HiveHBaseTableInputFormat.class.getCanonicalName();
    //    String serdeLib = HBaseSerDe.class.getCanonicalName();
        Map<String, String> serdeParams = new HashMap();
    //    serdeParams.put("serialization.format", "1");
    //    serdeParams.put("hbase.columns.mapping", ":key,f:name,f:regionkey,f:comment");
        serdeParams.put("serialization.format", "|");
        serdeParams.put("field.delim", "|");
            
            
        Map<String, String> tableParams = new HashMap();
        tableParams.put("hbase.table.name", "nation");
        SerDeInfo serDeInfo = new SerDeInfo(null, serdeLib, serdeParams);
        StorageDescriptor storageDescriptor = new StorageDescriptor(cols, location, inputFormat, null, false, -1, serDeInfo, null, null, null);
        Table table = new Table("table", "default", "sphillips", 0, 0, 0, storageDescriptor, new ArrayList<FieldSchema>(), tableParams, null, null, "MANAGED_TABLE");
        Properties properties = MetaStoreUtils.getTableMetadata(table);
        */

    HiveConf conf = new HiveConf();
    conf.set("hive.metastore.uris", "thrift://10.10.31.51:9083");
    HiveMetaStoreClient client = new HiveMetaStoreClient(conf);
    Table table = client.getTable("default", "nation");
    Properties properties = MetaStoreUtils.getTableMetadata(table);

    Path path = new Path(table.getSd().getLocation());
    JobConf job = new JobConf();
    for (Object obj : properties.keySet()) {
        job.set((String) obj, (String) properties.get(obj));
    }
    //    job.set("hbase.zookeeper.quorum", "10.10.31.51");
    //    job.set("hbase.zookeeper.property.clientPort", "5181");
    InputFormat f = (InputFormat) Class.forName(table.getSd().getInputFormat()).getConstructor().newInstance();
    job.setInputFormat(f.getClass());
    FileInputFormat.addInputPath(job, path);
    InputFormat format = job.getInputFormat();
    SerDe serde = (SerDe) Class.forName(table.getSd().getSerdeInfo().getSerializationLib()).getConstructor()
            .newInstance();
    serde.initialize(job, properties);
    ObjectInspector inspector = serde.getObjectInspector();
    ObjectInspector.Category cat = inspector.getCategory();
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromObjectInspector(inspector);
    List<String> columns = null;
    List<TypeInfo> colTypes = null;
    List<ObjectInspector> fieldObjectInspectors = Lists.newArrayList();

    switch (typeInfo.getCategory()) {
    case STRUCT:
        columns = ((StructTypeInfo) typeInfo).getAllStructFieldNames();
        colTypes = ((StructTypeInfo) typeInfo).getAllStructFieldTypeInfos();
        for (int i = 0; i < columns.size(); i++) {
            System.out.print(columns.get(i));
            System.out.print(" ");
            System.out.print(colTypes.get(i));
        }
        System.out.println("");
        for (StructField field : ((StructObjectInspector) inspector).getAllStructFieldRefs()) {
            fieldObjectInspectors.add(field.getFieldObjectInspector());
        }
    }

    for (InputSplit split : format.getSplits(job, 1)) {
        String encoded = serializeInputSplit(split);
        System.out.println(encoded);
        InputSplit newSplit = deserializeInputSplit(encoded, split.getClass().getCanonicalName());
        System.out.print("Length: " + newSplit.getLength() + " ");
        System.out.print("Locations: ");
        for (String loc : newSplit.getLocations())
            System.out.print(loc + " ");
        System.out.println();
    }

    for (InputSplit split : format.getSplits(job, 1)) {
        RecordReader reader = format.getRecordReader(split, job, Reporter.NULL);
        Object key = reader.createKey();
        Object value = reader.createValue();
        int count = 0;
        while (reader.next(key, value)) {
            List<Object> values = ((StructObjectInspector) inspector)
                    .getStructFieldsDataAsList(serde.deserialize((Writable) value));
            StructObjectInspector sInsp = (StructObjectInspector) inspector;
            Object obj = sInsp.getStructFieldData(serde.deserialize((Writable) value),
                    sInsp.getStructFieldRef("n_name"));
            System.out.println(obj);
            /*
            for (Object obj : values) {
              PrimitiveObjectInspector.PrimitiveCategory pCat = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveCategory();
              Object pObj = ((PrimitiveObjectInspector)fieldObjectInspectors.get(count)).getPrimitiveJavaObject(obj);
              System.out.print(pObj + " ");
            }
            */
            System.out.println("");
        }
    }
}

From source file:org.apache.hive.jdbc.BaseJdbcWithMiniLlap.java

License:Apache License

private int processQuery(String currentDatabase, String query, int numSplits, RowProcessor rowProcessor)
        throws Exception {
    String url = miniHS2.getJdbcURL();
    String user = System.getProperty("user.name");
    String pwd = user;/*from w  w w  .j  a  v a 2  s  .  c  o m*/
    String handleId = UUID.randomUUID().toString();

    InputFormat<NullWritable, Row> inputFormat = getInputFormat();

    // Get splits
    JobConf job = new JobConf(conf);
    job.set(LlapBaseInputFormat.URL_KEY, url);
    job.set(LlapBaseInputFormat.USER_KEY, user);
    job.set(LlapBaseInputFormat.PWD_KEY, pwd);
    job.set(LlapBaseInputFormat.QUERY_KEY, query);
    job.set(LlapBaseInputFormat.HANDLE_ID, handleId);
    if (currentDatabase != null) {
        job.set(LlapBaseInputFormat.DB_KEY, currentDatabase);
    }

    InputSplit[] splits = inputFormat.getSplits(job, numSplits);
    assertTrue(splits.length > 0);

    // Fetch rows from splits
    boolean first = true;
    int rowCount = 0;
    for (InputSplit split : splits) {
        System.out.println("Processing split " + split.getLocations());

        int numColumns = 2;
        RecordReader<NullWritable, Row> reader = inputFormat.getRecordReader(split, job, null);
        Row row = reader.createValue();
        while (reader.next(NullWritable.get(), row)) {
            rowProcessor.process(row);
            ++rowCount;
        }
        //In arrow-mode this will throw exception unless all buffers have been released
        //See org.apache.hadoop.hive.llap.LlapArrowBatchRecordReader
        reader.close();
    }
    LlapBaseInputFormat.close(handleId);

    return rowCount;
}