List of usage examples for org.apache.spark.sql.execution.streaming Offset json
public abstract String json();
From source file:com.hurence.logisland.connect.source.KafkaConnectStreamSource.java
License:Apache License
@Override public Dataset<Row> getBatch(Option<Offset> start, Offset end) { Long startOff = start.isDefined() ? Long.parseLong(start.get().json()) : !bufferedRecords.isEmpty() ? bufferedRecords.firstKey() : 0L; Map<Integer, List<InternalRow>> current = new LinkedHashMap<>( bufferedRecords.subMap(startOff, Long.parseLong(end.json()) + 1)).keySet().stream() .flatMap(offset -> { List<Tuple2<SourceTask, SourceRecord>> srl = bufferedRecords.remove(offset); if (srl != null) { uncommittedRecords.put(offset, srl); return srl.stream(); }//from ww w. j a v a 2 s . c om return Stream.empty(); }).map(Tuple2::_2).map( sourceRecord -> InternalRow.fromSeq(JavaConversions .<Object>asScalaBuffer(Arrays.asList(toUTFString(sourceRecord.topic()), toUTFString(sourceRecord.sourcePartition()), toUTFString(sourceRecord.sourceOffset()), keyConverter.fromConnectData(sourceRecord.topic(), sourceRecord.keySchema(), sourceRecord.key()), valueConverter.fromConnectData(sourceRecord.topic(), sourceRecord.valueSchema(), sourceRecord.value()))) .toSeq())) .collect(Collectors.groupingBy(row -> Objects.hashCode((row.getString(1))))); return sparkPlatform.createStreamingDataFrame(sqlContext, new SimpleRDD(sqlContext.sparkContext(), current), DATA_SCHEMA); }
From source file:com.hurence.logisland.connect.source.KafkaConnectStreamSource.java
License:Apache License
@Override public void commit(Offset end) { if (uncommittedRecords.isEmpty()) { return;//from w ww .j a v a2s. c o m } //first commit all offsets already given List<Tuple2<SourceTask, SourceRecord>> recordsToCommit = new LinkedHashMap<>( uncommittedRecords.subMap(uncommittedRecords.firstKey(), Long.parseLong(end.json()) + 1)).keySet() .stream().flatMap(key -> uncommittedRecords.remove(key).stream()) .collect(Collectors.toList()); recordsToCommit.forEach(tuple -> { try { offsetWriterMap.get(tuple._1()).offset(tuple._2().sourcePartition(), tuple._2().sourceOffset()); tuple._1().commitRecord(tuple._2()); } catch (Exception e) { LOGGER.warn("Unable to commit record " + tuple._2(), e); } }); recordsToCommit.stream().map(Tuple2::_1).distinct().forEach(sourceTask -> { try { sourceTask.commit(); } catch (Exception e) { LOGGER.warn("Unable to bulk commit offset for connector " + connectorName, e); } }); //now flush offset writer offsetWriterMap.values().forEach(offsetStorageWriter -> { try { if (offsetStorageWriter.beginFlush()) { offsetStorageWriter.doFlush((error, result) -> { if (error == null) { LOGGER.debug("Flushing till offset {} with result {}", end, result); } else { LOGGER.error("Unable to commit records till source offset " + end, error); } }).get(30, TimeUnit.SECONDS); } } catch (Exception e) { LOGGER.error("Unable to commit records till source offset " + end, e); } }); }