Flink – metrics V1.2

WebRuntimeMonitor

.GET("/jobs/:jobid/vertices/:vertexid/metrics", handler(new JobVertexMetricsHandler(metricFetcher)))

.GET("/jobs/:jobid/metrics", handler(new JobMetricsHandler(metricFetcher)))

.GET("/taskmanagers/:" + TaskManagersHandler.TASK_MANAGER_ID_KEY + "/metrics", handler(new TaskManagerMetricsHandler(metricFetcher)))

.GET("/jobmanager/metrics", handler(new JobManagerMetricsHandler(metricFetcher)))

JobVertexMetricsHandler

AbstractMetricsHandler

MetricFetcher

核心就是fetchMetrics函数，会从JobManager获取数据，

private void fetchMetrics() {

    try {

        Option<scala.Tuple2<ActorGateway, Integer>> jobManagerGatewayAndWebPort = retriever.getJobManagerGatewayAndWebPort();

        if (jobManagerGatewayAndWebPort.isDefined()) {

            ActorGateway jobManager = jobManagerGatewayAndWebPort.get()._1(); //得到JobManager的ActorGateway

            /**

             * Remove all metrics that belong to a job that is not running and no longer archived.

             */

            Future<Object> jobDetailsFuture = jobManager.ask(new RequestJobDetails(true, true), timeout); //生成request获取job状态

            jobDetailsFuture

                .onSuccess(new OnSuccess<Object>() {

                    @Override

                    public void onSuccess(Object result) throws Throwable {

                        MultipleJobsDetails details = (MultipleJobsDetails) result;

                        ArrayList<String> toRetain = new ArrayList<>();

                        for (JobDetails job : details.getRunningJobs()) {

                            toRetain.add(job.getJobId().toString());

                        }

                        for (JobDetails job : details.getFinishedJobs()) {

                            toRetain.add(job.getJobId().toString());

                        }

                        synchronized (metrics) {

                            metrics.jobs.keySet().retainAll(toRetain); //只保留Runing和Finished的job，即不正常的都删掉

                        }

                    }

                }, ctx);

            logErrorOnFailure(jobDetailsFuture, "Fetching of JobDetails failed.");

            String jobManagerPath = jobManager.path();

            String queryServicePath = jobManagerPath.substring(0, jobManagerPath.lastIndexOf('/') + 1) + MetricQueryService.METRIC_QUERY_SERVICE_NAME;

            ActorRef jobManagerQueryService = actorSystem.actorFor(queryServicePath);

            queryMetrics(jobManagerQueryService); //查询jobManager的Metrics

            /**

             * We first request the list of all registered task managers from the job manager, and then

             * request the respective metric dump from each task manager.

             *

             * All stored metrics that do not belong to a registered task manager will be removed.

             */

            Future<Object> registeredTaskManagersFuture = jobManager.ask(JobManagerMessages.getRequestRegisteredTaskManagers(), timeout); //查询所有taskManager

            registeredTaskManagersFuture

                .onSuccess(new OnSuccess<Object>() {

                    @Override

                    public void onSuccess(Object result) throws Throwable {

                        Iterable<Instance> taskManagers = ((JobManagerMessages.RegisteredTaskManagers) result).asJavaIterable();

                        List<String> activeTaskManagers = new ArrayList<>();

                        for (Instance taskManager : taskManagers) { //遍历taskManager

                            activeTaskManagers.add(taskManager.getId().toString());

                            String taskManagerPath = taskManager.getTaskManagerGateway().getAddress();

                            String queryServicePath = taskManagerPath.substring(0, taskManagerPath.lastIndexOf('/') + 1) + MetricQueryService.METRIC_QUERY_SERVICE_NAME + "_" + taskManager.getTaskManagerID().getResourceIdString();

                            ActorRef taskManagerQueryService = actorSystem.actorFor(queryServicePath);

                            queryMetrics(taskManagerQueryService); //查询每个taskMananger的metrics

                        }

                        synchronized (metrics) { // remove all metrics belonging to unregistered task managers

                            metrics.taskManagers.keySet().retainAll(activeTaskManagers); //删除所有的未注册的TaskManager

                        }

                    }

                }, ctx);

            logErrorOnFailure(registeredTaskManagersFuture, "Fetchin list of registered TaskManagers failed.");

        }

    } catch (Exception e) {

        LOG.warn("Exception while fetching metrics.", e);

    }

}

queryMetrics

/**

 * Requests a metric dump from the given actor.

 *

 * @param actor ActorRef to request the dump from

 */

private void queryMetrics(ActorRef actor) {

    Future<Object> metricQueryFuture = new BasicGateway(actor).ask(MetricQueryService.getCreateDump(), timeout); //获取metrics dump

    metricQueryFuture

        .onSuccess(new OnSuccess<Object>() {

            @Override

            public void onSuccess(Object result) throws Throwable {

                addMetrics(result);

            }

        }, ctx);

    logErrorOnFailure(metricQueryFuture, "Fetching metrics failed.");

}

private void addMetrics(Object result) throws IOException {

    byte[] data = (byte[]) result;

    List<MetricDump> dumpedMetrics = deserializer.deserialize(data);

    for (MetricDump metric : dumpedMetrics) {

        metrics.add(metric); //把metrics dump加入metrics store

    }

}

MetricStore

用嵌套的hashmap来存储metrics，瞬时值

final JobManagerMetricStore jobManager = new JobManagerMetricStore();

final Map<String, TaskManagerMetricStore> taskManagers = new HashMap<>();

final Map<String, JobMetricStore> jobs = new HashMap<>();

public static class JobManagerMetricStore extends ComponentMetricStore {

}

private static abstract class ComponentMetricStore {

    public final Map<String, String> metrics = new HashMap<>(); //store就是一个map

    public String getMetric(String name, String defaultValue) {

        String value = this.metrics.get(name);

        return value != null

            ? value

            : defaultValue;

    }

}

MetricQueryService

public class MetricQueryService extends UntypedActor {

    private static final Logger LOG = LoggerFactory.getLogger(MetricQueryService.class);

    public static final String METRIC_QUERY_SERVICE_NAME = "MetricQueryService";

    private static final CharacterFilter FILTER = new CharacterFilter() {

        @Override

        public String filterCharacters(String input) {

            return replaceInvalidChars(input);

        }

    };

    private final MetricDumpSerializer serializer = new MetricDumpSerializer();

    private final Map<Gauge<?>, Tuple2<QueryScopeInfo, String>> gauges = new HashMap<>();

    private final Map<Counter, Tuple2<QueryScopeInfo, String>> counters = new HashMap<>();

    private final Map<Histogram, Tuple2<QueryScopeInfo, String>> histograms = new HashMap<>();

    private final Map<Meter, Tuple2<QueryScopeInfo, String>> meters = new HashMap<>();

收到CreateDump请求，

} else if (message instanceof CreateDump) {

    byte[] dump = serializer.serialize(counters, gauges, histograms, meters);

    getSender().tell(dump, getSelf());

Start

   /**

     * Starts the MetricQueryService actor in the given actor system.

     *

     * @param actorSystem The actor system running the MetricQueryService

     * @param resourceID resource ID to disambiguate the actor name

     * @return actor reference to the MetricQueryService

     */

    public static ActorRef startMetricQueryService(ActorSystem actorSystem, ResourceID resourceID) {

        String actorName = resourceID == null

            ? METRIC_QUERY_SERVICE_NAME

            : METRIC_QUERY_SERVICE_NAME + "_" + resourceID.getResourceIdString();

        return actorSystem.actorOf(Props.create(MetricQueryService.class), actorName);

    }

在MetricRegistry中把metrics注册到QueryService中，

if (queryService != null) {

    MetricQueryService.notifyOfAddedMetric(queryService, metric, metricName, group);

}

采集点

numRecordsIn

StreamInputProcessor –> processInput

    @SuppressWarnings("SynchronizationOnLocalVariableOrMethodParameter")

    public boolean processInput(OneInputStreamOperator<IN, ?> streamOperator, final Object lock) throws Exception {

        if (numRecordsIn == null) {

            numRecordsIn = ((OperatorMetricGroup) streamOperator.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter();

        }

        //......

        // now we can do the actual processing

        StreamRecord<IN> record = recordOrMark.asRecord();

        synchronized (lock) {

            numRecordsIn.inc(); //执行processElement前加一

            streamOperator.setKeyContextElement1(record);

            streamOperator.processElement(record);

        }

        return true;

如果是chaining，

ChainingOutput

private static class ChainingOutput<T> implements Output<StreamRecord<T>> {

    protected final OneInputStreamOperator<T, ?> operator;

    protected final Counter numRecordsIn;

    public ChainingOutput(OneInputStreamOperator<T, ?> operator) {

        this.operator = operator;

        this.numRecordsIn = ((OperatorMetricGroup) operator.getMetricGroup()).getIOMetricGroup().getNumRecordsInCounter(); //初始化

    }

    @Override

    public void collect(StreamRecord<T> record) {

        try {

            numRecordsIn.inc(); //对于chain，在output时调用processElement

            operator.setKeyContextElement1(record);

            operator.processElement(record);

        }

        catch (Exception e) {

            throw new ExceptionInChainedOperatorException(e);

        }

    }

numRecordsOut

在AbstractStreamOperator初始化时，

生成CountingOutput

    @Override

    public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {

        this.container = containingTask;

        this.config = config;

        this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());

        this.output = new CountingOutput(output, ((OperatorMetricGroup) this.metrics).getIOMetricGroup().getNumRecordsOutCounter()); //生成CountingOutput

这个output，

在processWatermark，processElement中会用于emit数据

output.emitWatermark(mark);

    public class CountingOutput implements Output<StreamRecord<OUT>> {

        private final Output<StreamRecord<OUT>> output;

        private final Counter numRecordsOut;

        public CountingOutput(Output<StreamRecord<OUT>> output, Counter counter) {

            this.output = output;

            this.numRecordsOut = counter;

        }

        @Override

        public void emitWatermark(Watermark mark) {

            output.emitWatermark(mark);

        }

        @Override

        public void emitLatencyMarker(LatencyMarker latencyMarker) {

            output.emitLatencyMarker(latencyMarker);

        }

        @Override

        public void collect(StreamRecord<OUT> record) {

            numRecordsOut.inc(); //发出的时候，inc numRecordsOut

            output.collect(record);

        }

        @Override

        public void close() {

            output.close();

        }

    }

注意numRecordsOut和numRecordsIn，除了会统计operator级别的，还会统计task级别的，逻辑在

AbstractStreamOperator

    public void setup(StreamTask<?, ?> containingTask, StreamConfig config, Output<StreamRecord<OUT>> output) {

        this.container = containingTask;

        this.config = config;

        this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());

        this.output = new CountingOutput(output, ((OperatorMetricGroup) this.metrics).getIOMetricGroup().getNumRecordsOutCounter());

        if (config.isChainStart()) {

            ((OperatorMetricGroup) this.metrics).getIOMetricGroup().reuseInputMetricsForTask();

        }

        if (config.isChainEnd()) {

            ((OperatorMetricGroup) this.metrics).getIOMetricGroup().reuseOutputMetricsForTask();

        }

OperatorIOMetricGroup

    public void reuseInputMetricsForTask() {

        TaskIOMetricGroup taskIO = parentMetricGroup.parent().getIOMetricGroup();

        taskIO.reuseRecordsInputCounter(this.numRecordsIn);

    }

    public void reuseOutputMetricsForTask() {

        TaskIOMetricGroup taskIO = parentMetricGroup.parent().getIOMetricGroup();

        taskIO.reuseRecordsOutputCounter(this.numRecordsOut);

    }

可以看到，会将ChainHead的numRecordsIn，set到task的TaskIOMetricGroup

而将ChainEnd的numRecordsOut，set到task的TaskIOMetricGroup

看起来很合理

numRecordInPerSecond，numRecordsOutPerSecond

在OperatorIOMetricGroup

public OperatorIOMetricGroup(OperatorMetricGroup parentMetricGroup) {

        super(parentMetricGroup);

        numRecordsIn = parentMetricGroup.counter(MetricNames.IO_NUM_RECORDS_IN);

        numRecordsOut = parentMetricGroup.counter(MetricNames.IO_NUM_RECORDS_OUT);

        numRecordsInRate = parentMetricGroup.meter(MetricNames.IO_NUM_RECORDS_IN_RATE, new MeterView(numRecordsIn, 60));

        numRecordsOutRate = parentMetricGroup.meter(MetricNames.IO_NUM_RECORDS_OUT_RATE, new MeterView(numRecordsOut, 60));

    }

可以看到numRecordsInRate和numRecordsOutRate，只是numRecordsIn和numRecordsOut的MeterView

public class MeterView implements Meter, View {

    /** The underlying counter maintaining the count */

    private final Counter counter;

    /** The time-span over which the average is calculated */

    private final int timeSpanInSeconds;

    /** Circular array containing the history of values */

    private final long[] values;

    /** The index in the array for the current time */

    private int time = 0;

    /** The last rate we computed */

    private double currentRate = 0;

    public MeterView(Counter counter, int timeSpanInSeconds) {

        this.counter = counter;

        this.timeSpanInSeconds = timeSpanInSeconds - (timeSpanInSeconds % UPDATE_INTERVAL_SECONDS); //timeSpanInSeconds需要是UPDATE_INTERVAL_SECONDS(5)的倍数，

        this.values = new long[this.timeSpanInSeconds / UPDATE_INTERVAL_SECONDS + 1]; //比如timeSpanInSeconds为60，那么就需要保存12个value

    }

    @Override

    public void markEvent() {

        this.counter.inc();

    }

    @Override

    public void markEvent(long n) {

        this.counter.inc(n);

    }

    @Override

    public long getCount() {

        return counter.getCount();

    }

    @Override

    public double getRate() { //获取平均值

        return currentRate;

    }

    @Override

    public void update() { //会被以UPDATE_INTERVAL_SECONDS为间隔调用

        time = (time + 1) % values.length;

        values[time] = counter.getCount();

        currentRate =  ((double) (values[time] - values[(time + 1) % values.length]) / timeSpanInSeconds); //values保存了timeSpanInSeconds时间段的counter的变化过程，所以用最新的减最老的，再除以timeSpanInSeconds

    }

}

这个实现真是tricky，不好的设计

在MetricRegistry中，会创建

ViewUpdater

    public void register(Metric metric, String metricName, AbstractMetricGroup group) {

        try {

            if (reporters != null) {

                for (int i = 0; i < reporters.size(); i++) {

                    MetricReporter reporter = reporters.get(i);

                    if (reporter != null) {

                        FrontMetricGroup front = new FrontMetricGroup<AbstractMetricGroup<?>>(i, group);

                        reporter.notifyOfAddedMetric(metric, metricName, front);

                    }

                }

            }

            if (queryService != null) {

                MetricQueryService.notifyOfAddedMetric(queryService, metric, metricName, group);

            }

            if (metric instanceof View) {

                if (viewUpdater == null) {

                    viewUpdater = new ViewUpdater(executor);

                }

                viewUpdater.notifyOfAddedView((View) metric);

            }

        } catch (Exception e) {

            LOG.error("Error while registering metric.", e);

        }

    }

并且在register metrics的时候，除了注册到reporter，MetricQueryService

如果是view的子类还要，注册到ViewUpdater

    public ViewUpdater(ScheduledExecutorService executor) {

        executor.scheduleWithFixedDelay(new ViewUpdaterTask(lock, toAdd, toRemove), 5, UPDATE_INTERVAL_SECONDS, TimeUnit.SECONDS);

    }

ViewUpdater会定期执行ViewUpdaterTask，task中就会调用view的update

numBytesInLocal, numBytesInRemote

在RemoteInputChannel和LocalInputChannel中，

    public LocalInputChannel(

        SingleInputGate inputGate,

        int channelIndex,

        ResultPartitionID partitionId,

        ResultPartitionManager partitionManager,

        TaskEventDispatcher taskEventDispatcher,

        int initialBackoff,

        int maxBackoff,

        TaskIOMetricGroup metrics) {

        super(inputGate, channelIndex, partitionId, initialBackoff, maxBackoff, metrics.getNumBytesInLocalCounter()); //metrics.getNumBytesInLocalCounter()

    public RemoteInputChannel(

        SingleInputGate inputGate,

        int channelIndex,

        ResultPartitionID partitionId,

        ConnectionID connectionId,

        ConnectionManager connectionManager,

        int initialBackOff,

        int maxBackoff,

        TaskIOMetricGroup metrics) {

        super(inputGate, channelIndex, partitionId, initialBackOff, maxBackoff, metrics.getNumBytesInRemoteCounter()); // metrics.getNumBytesInRemoteCounter()

并且都会在

BufferAndAvailability getNextBuffer()

会调用，

numBytesIn.inc(next.getSize());

numBytesOut

RecordWriter

public class RecordWriter<T extends IOReadableWritable> {

    private Counter numBytesOut = new SimpleCounter();

    public void emit(T record) throws IOException, InterruptedException {

        for (int targetChannel : channelSelector.selectChannels(record, numChannels)) {

            sendToTarget(record, targetChannel);

        }

    }

    private void sendToTarget(T record, int targetChannel) throws IOException, InterruptedException {

        RecordSerializer<T> serializer = serializers[targetChannel];

        synchronized (serializer) {

            SerializationResult result = serializer.addRecord(record);

            while (result.isFullBuffer()) {

                Buffer buffer = serializer.getCurrentBuffer();

                if (buffer != null) {

                    numBytesOut.inc(buffer.getSize()); //计数numBytesOut

                    writeAndClearBuffer(buffer, targetChannel, serializer);

                    // If this was a full record, we are done. Not breaking

                    // out of the loop at this point will lead to another

                    // buffer request before breaking out (that would not be

                    // a problem per se, but it can lead to stalls in the

                    // pipeline).

                    if (result.isFullRecord()) {

                        break;

                    }

                } else {

                    buffer = targetPartition.getBufferProvider().requestBufferBlocking();

                    result = serializer.setNextBuffer(buffer);

                }

            }

        }

    }

RecordWriterOutput.collect –> StreamRecordWriter.emit –> RecordWriter.emit

inputQueueLength, outputQueueLength, inPoolUsage, outPoolUsage

TaskIOMetricGroup

   /**

     * Initialize Buffer Metrics for a task

     */

    public void initializeBufferMetrics(Task task) {

        final MetricGroup buffers = addGroup("buffers");

        buffers.gauge("inputQueueLength", new InputBuffersGauge(task));

        buffers.gauge("outputQueueLength", new OutputBuffersGauge(task));

        buffers.gauge("inPoolUsage", new InputBufferPoolUsageGauge(task));

        buffers.gauge("outPoolUsage", new OutputBufferPoolUsageGauge(task));

    }

inputQueueLength

for (SingleInputGate inputGate : task.getAllInputGates()) {

    totalBuffers += inputGate.getNumberOfQueuedBuffers();

}

inputGate.getNumberOfQueuedBuffers

for (InputChannel channel : inputChannels.values()) {

    if (channel instanceof RemoteInputChannel) { // 只统计RemoteInputChannel

        totalBuffers += ((RemoteInputChannel) channel).getNumberOfQueuedBuffers();

    }

}

getNumberOfQueuedBuffers

/**

     * The received buffers. Received buffers are enqueued by the network I/O thread and the queue

     * is consumed by the receiving task thread.

     */

    private final Queue<Buffer> receivedBuffers = new ArrayDeque<>();

    public int getNumberOfQueuedBuffers() {

        synchronized (receivedBuffers) {

            return receivedBuffers.size();

        }

    }

outputQueueLength

for (ResultPartition producedPartition : task.getProducedPartitions()) {

    totalBuffers += producedPartition.getNumberOfQueuedBuffers();

}

ResultPartition getNumberOfQueuedBuffers

for (ResultSubpartition subpartition : subpartitions) {

    totalBuffers += subpartition.getNumberOfQueuedBuffers();

}

SpillableSubpartition getNumberOfQueuedBuffers

class SpillableSubpartition extends ResultSubpartition {

    /** Buffers are kept in this queue as long as we weren't ask to release any. */

    private final ArrayDeque<Buffer> buffers = new ArrayDeque<>();

    @Override

    public int getNumberOfQueuedBuffers() {

        return buffers.size();

    }

inputQueueLength, outputQueueLength

指标的含义是，inputchannel和resultparitition，持有的buffer个数，这些buffer被读完后会release，所以链路通畅的话，length应该会很小

inPoolUsage

int usedBuffers = 0;

int bufferPoolSize = 0;

for (SingleInputGate inputGate : task.getAllInputGates()) {

    usedBuffers += inputGate.getBufferPool().bestEffortGetNumOfUsedBuffers();

    bufferPoolSize += inputGate.getBufferPool().getNumBuffers();

}

if (bufferPoolSize != 0) {

    return ((float) usedBuffers) / bufferPoolSize;

} else {

    return 0.0f;

}

bestEffortGetNumOfUsedBuffers()

@Override

public int bestEffortGetNumOfUsedBuffers() {

    return Math.max(0, numberOfRequestedMemorySegments - availableMemorySegments.size());

}

numberOfRequestedMemorySegments，从bufferpool申请多少

availableMemorySegments，可用的

所以相减就是使用多少

outPoolUsage

int usedBuffers = 0;

int bufferPoolSize = 0;

for (ResultPartition resultPartition : task.getProducedPartitions()) {

    usedBuffers += resultPartition.getBufferPool().bestEffortGetNumOfUsedBuffers();

    bufferPoolSize += resultPartition.getBufferPool().getNumBuffers();

}

if (bufferPoolSize != 0) {

    return ((float) usedBuffers) / bufferPoolSize;

} else {

    return 0.0f;

}

和inPoolUsage类似，也是看bufferPool的情况

所以inPoolUsage，outPoolUsage表示的是inputgate和resultpartition中bufferpool的使用情况

这个bufferpool是inputgate初始化的时候，注册到NetworkEnvironment创建的，

// Setup the buffer pool for each buffer reader

final SingleInputGate[] inputGates = task.getAllInputGates();

for (SingleInputGate gate : inputGates) {

    BufferPool bufferPool = null;

    try {

        bufferPool = networkBufferPool.createBufferPool(gate.getNumberOfInputChannels(), false);

        gate.setBufferPool(bufferPool);

    }

可以看到默认大小是，inputchanels的size

如果pool用完了，那么inputGate和ResultPartiton就无法继续读取新的数据

latency

在AbstractStreamOperator中，

setup，

protected LatencyGauge latencyGauge;

latencyGauge = this.metrics.gauge("latency", new LatencyGauge(historySize));

注意，这里metrics是OperatorMetricGroup

this.metrics = container.getEnvironment().getMetricGroup().addOperator(config.getOperatorName());

TaskMetricGroup

    public OperatorMetricGroup addOperator(String name) {

        OperatorMetricGroup operator = new OperatorMetricGroup(this.registry, this, name);

        synchronized (this) {

            OperatorMetricGroup previous = operators.put(name, operator);

            if (previous == null) {

                // no operator group so far

                return operator;

            } else {

                // already had an operator group. restore that one.

                operators.put(name, previous);

                return previous;

            }

        }

    }

LatencyGauge的定义，

/**

     * The gauge uses a HashMap internally to avoid classloading issues when accessing

     * the values using JMX.

     */

    protected static class LatencyGauge implements Gauge<Map<String, HashMap<String, Double>>> {

        //LatencySourceDescriptor，包含vertexID和subtaskIndex

        //DescriptiveStatistics，统计模块

        private final Map<LatencySourceDescriptor, DescriptiveStatistics> latencyStats = new HashMap<>();

        private final int historySize;

        LatencyGauge(int historySize) {

            this.historySize = historySize;

        }

        public void reportLatency(LatencyMarker marker, boolean isSink) {

            LatencySourceDescriptor sourceDescriptor = LatencySourceDescriptor.of(marker, !isSink);

            DescriptiveStatistics sourceStats = latencyStats.get(sourceDescriptor);

            if (sourceStats == null) { //初始化DescriptiveStatistics

                // 512 element window (4 kb)

                sourceStats = new DescriptiveStatistics(this.historySize);

                latencyStats.put(sourceDescriptor, sourceStats);

            }

            long now = System.currentTimeMillis();

            sourceStats.addValue(now - marker.getMarkedTime()); //当前时间和source发出时时间差值作为延迟

        }

        @Override

        public Map<String, HashMap<String, Double>> getValue() {

            while (true) {

                try {

                    Map<String, HashMap<String, Double>> ret = new HashMap<>();

                    for (Map.Entry<LatencySourceDescriptor, DescriptiveStatistics> source : latencyStats.entrySet()) {

                        HashMap<String, Double> sourceStatistics = new HashMap<>(6);

                        sourceStatistics.put("max", source.getValue().getMax());

                        sourceStatistics.put("mean", source.getValue().getMean());

                        sourceStatistics.put("min", source.getValue().getMin());

                        sourceStatistics.put("p50", source.getValue().getPercentile(50));

                        sourceStatistics.put("p95", source.getValue().getPercentile(95));

                        sourceStatistics.put("p99", source.getValue().getPercentile(99));

                        ret.put(source.getKey().toString(), sourceStatistics);

                    }

                    return ret;

                    // Concurrent access onto the "latencyStats" map could cause

                    // ConcurrentModificationExceptions. To avoid unnecessary blocking

                    // of the reportLatency() method, we retry this operation until

                    // it succeeds.

                } catch(ConcurrentModificationException ignore) {

                    LOG.debug("Unable to report latency statistics", ignore);

                }

            }

        }

    }

这个Gauge.getValue返回的是个map，太奇葩

latencyStats里面有多少entry，取决于有多少source，以及每个source有几个并发

因为他要记录，每个source operator的某个subtask，到当前operator的该subtask的延迟

        public static LatencySourceDescriptor of(LatencyMarker marker, boolean ignoreSubtaskIndex) {

            if (ignoreSubtaskIndex) {

                return new LatencySourceDescriptor(marker.getVertexID(), -1);

            } else {

                return new LatencySourceDescriptor(marker.getVertexID(), marker.getSubtaskIndex());

            }

        }

LatencySourceDescriptor构造函数，由vertexid，和subtaskIndex组成

如果忽略subtaskindex，置为-1

流程

StreamSource

定义LatencyMarksEmitter

private static class LatencyMarksEmitter<OUT> {

        private final ScheduledFuture<?> latencyMarkTimer;

        public LatencyMarksEmitter(

                final ProcessingTimeService processingTimeService,

                final Output<StreamRecord<OUT>> output,

                long latencyTrackingInterval,

                final int vertexID,

                final int subtaskIndex) {

            latencyMarkTimer = processingTimeService.scheduleAtFixedRate( //根据processingTime定期发送latencyMarker

                new ProcessingTimeCallback() {

                    @Override

                    public void onProcessingTime(long timestamp) throws Exception {

                        try {

                            // ProcessingTimeService callbacks are executed under the checkpointing lock

                            output.emitLatencyMarker(new LatencyMarker(timestamp, vertexID, subtaskIndex)); //emitLatencyMarker，以processTime为初始时间

                        } catch (Throwable t) {

                            // we catch the Throwables here so that we don't trigger the processing

                            // timer services async exception handler

                            LOG.warn("Error while emitting latency marker.", t);

                        }

                    }

                },

                0L,

                latencyTrackingInterval);

        }

source.run，当isLatencyTrackingEnabled，schedule latency marker

public void run(final Object lockingObject, final Output<StreamRecord<OUT>> collector) throws Exception {

        final TimeCharacteristic timeCharacteristic = getOperatorConfig().getTimeCharacteristic();

        LatencyMarksEmitter latencyEmitter = null;

        if(getExecutionConfig().isLatencyTrackingEnabled()) {

            latencyEmitter = new LatencyMarksEmitter<>(

                getProcessingTimeService(),

                collector,

                getExecutionConfig().getLatencyTrackingInterval(),

                getOperatorConfig().getVertexID(),

                getRuntimeContext().getIndexOfThisSubtask());

        }

StreamInputProcessor –> processInput

如果是isLatencyMarker

else if(recordOrMark.isLatencyMarker()) {

    // handle latency marker

    synchronized (lock) {

        streamOperator.processLatencyMarker(recordOrMark.asLatencyMarker());

    }

    continue;

}

对于，chaining， ChainingOutput

private static class ChainingOutput<T> implements Output<StreamRecord<T>> {

    protected final OneInputStreamOperator<T, ?> operator;

    protected final Counter numRecordsIn;

    @Override

    public void emitLatencyMarker(LatencyMarker latencyMarker) {

        try {

            operator.processLatencyMarker(latencyMarker);

        }

        catch (Exception e) {

            throw new ExceptionInChainedOperatorException(e);

        }

    }

AbstractStreamOperator

public void processLatencyMarker(LatencyMarker latencyMarker) throws Exception {

        reportOrForwardLatencyMarker(latencyMarker);

    }

protected void reportOrForwardLatencyMarker(LatencyMarker marker) {

        // all operators are tracking latencies

        this.latencyGauge.reportLatency(marker, false);

        // everything except sinks forwards latency markers

        this.output.emitLatencyMarker(marker);

    }

调用到latencyGauge.reportLatency，逻辑如上

后续继续emitLatencyMarker

currentLowWatermark, checkpointAlignmentTime

OneInputStreamTask

@Override

    public void init() throws Exception {

         if (numberOfInputs > 0) {

            InputGate[] inputGates = getEnvironment().getAllInputGates();

            inputProcessor = new StreamInputProcessor<IN>(

                    inputGates, inSerializer,

                    this,

                    configuration.getCheckpointMode(),

                    getEnvironment().getIOManager(),

                    getEnvironment().getTaskManagerInfo().getConfiguration());

            // make sure that stream tasks report their I/O statistics

            inputProcessor.setMetricGroup(getEnvironment().getMetricGroup().getIOMetricGroup());

        }

    }

StreamInputProcessor

    public void setMetricGroup(TaskIOMetricGroup metrics) {

        metrics.gauge("currentLowWatermark", new Gauge<Long>() {

            @Override

            public Long getValue() {

                return lastEmittedWatermark;

            }

        });

        metrics.gauge("checkpointAlignmentTime", new Gauge<Long>() {

            @Override

            public Long getValue() {

                return barrierHandler.getAlignmentDurationNanos();

            }

        });

    }

currentLowWatermark，即lastEmittedWatermark

默认值是，

lastEmittedWatermark = Long.MIN_VALUE;

所以如果没有assignTimestampsAndWatermarks，那么currentLowWatermark会是一个极大的负数

    public boolean processInput(OneInputStreamOperator<IN, ?> streamOperator, final Object lock) throws Exception {

        while (true) {

            if (currentRecordDeserializer != null) {

                if (result.isFullRecord()) {

                    StreamElement recordOrMark = deserializationDelegate.getInstance();

                    if (recordOrMark.isWatermark()) {

                        long watermarkMillis = recordOrMark.asWatermark().getTimestamp();

                        if (watermarkMillis > watermarks[currentChannel]) { // 更新每个channel对应的waterMark

                            watermarks[currentChannel] = watermarkMillis;

                            long newMinWatermark = Long.MAX_VALUE;

                            for (long watermark: watermarks) { // 找出所有channel最小的watermark，以最小的为准

                                newMinWatermark = Math.min(watermark, newMinWatermark);

                            }

                            if (newMinWatermark > lastEmittedWatermark) {

                                lastEmittedWatermark = newMinWatermark; // 将最小的watermark设为lastEmittedWatermark

                                synchronized (lock) {

                                    streamOperator.processWatermark(new Watermark(lastEmittedWatermark));

                                }

                            }

                        }

                        continue;

                    }

checkpointAlignmentTime

barrierHandler.getAlignmentDurationNanos

    @Override

    public long getAlignmentDurationNanos() {

        long start = this.startOfAlignmentTimestamp;

        if (start <= 0) {

            return latestAlignmentDurationNanos;

        } else {

            return System.nanoTime() - start;

        }

    }

startOfAlignmentTimestamp是在这次checkpoint开始的时候打的时间戳，即beginNewAlignment

    private void beginNewAlignment(long checkpointId, int channelIndex) throws IOException {

        currentCheckpointId = checkpointId;

        onBarrier(channelIndex);

        startOfAlignmentTimestamp = System.nanoTime();

    }

beginNewAlignment在

processBarrier中被调用，

        if (numBarriersReceived > 0) {

            // this is only true if some alignment is already progress and was not canceled

            if (barrierId == currentCheckpointId) {

                // regular case

                onBarrier(channelIndex);

            }

            else if (barrierId > currentCheckpointId) {// 当收到新的checkpointid，所以老的id已经过期，需要产生新的checkpoint

                // we did not complete the current checkpoint, another started before

                LOG.warn("Received checkpoint barrier for checkpoint {} before completing current checkpoint {}. " +

                        "Skipping current checkpoint.", barrierId, currentCheckpointId);

                // let the task know we are not completing this

                notifyAbort(currentCheckpointId, new CheckpointDeclineSubsumedException(barrierId));

                // abort the current checkpoint

                releaseBlocksAndResetBarriers();

                // begin a the new checkpoint

                beginNewAlignment(barrierId, channelIndex); //标识checkpoint开始

            }

            else {

                // ignore trailing barrier from an earlier checkpoint (obsolete now)

                return;

            }

        }

        else if (barrierId > currentCheckpointId) { //新的checkpoint开始

            // first barrier of a new checkpoint

            beginNewAlignment(barrierId, channelIndex); //标识checkpoint开始

        }

所以checkpointAlignmentTime的意思是，当前的checkpoint已经等待多久，因为要等到所有input channel的barrier，checkpoint才会触发

单位是纳秒，所以billion级别代表秒

如果比较大，说明各个并发之前的延迟差异较大，或延迟较高