监控spark应用的方式比较多,比如spark on yarn可以通过yarnClient api监控。这里介绍的是spark内置的一种监控方式

如果是sparkStreaming,对应的则是streamingListener

package cn.com.kong;

import org.apache.spark.SparkConf;
import org.apache.spark.scheduler.*;
import org.apache.spark.sql.SaveMode;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.status.AppStatusStore;
import org.apache.spark.status.api.v1.ApplicationInfo; public class CustomSparkListener { public static void main(String[] args) { System.setProperty("HADOOP_USER_NAME","etluser"); SparkConf conf = new SparkConf();
conf.set("spark.hadoopRDD.ignoreEmptySplits", "true");
conf.set("spark.sql.adaptive.enabled", "true");
conf.set("spark.sql.adaptive.join.enabled", "true");
conf.set("spark.executor.memoryOverhead", "1024");
conf.set("spark.driver.memoryOverhead", "1024");
conf.set("spark.kryoserializer.buffer.max", "256m");
conf.set("spark.kryoserializer.buffer", "64m");
conf.set("spark.executor.extraJavaOptions", "-XX:+UseG1GC -Dlog4j.configuration=log4j.properties");
conf.set("spark.driver.extraJavaOptions", "-XX:+UseG1GC -Dlog4j.configuration=log4j.properties");
conf.set("spark.sql.parquet.writeLegacyFormat", "true"); SparkSession spark = SparkSession
.builder()
.appName("testSparkListener")
.master("local")
.config(conf)
.enableHiveSupport()
.getOrCreate(); spark.sql("use coveroptimize"); // AppStatusStore appStatusStore = spark.sparkContext().statusStore();
// ApplicationInfo applicationInfo = appStatusStore.applicationInfo();
// applicationInfo.memoryPerExecutorMB(); //可以创建一个类实现Listener接口,然后调用该类实例。
//这里测试,直接创建
spark.sparkContext().addSparkListener(new SparkListenerInterface() {
@Override
public void onExecutorRemoved( SparkListenerExecutorRemoved executorRemoved) {
} /**
* Called when a stage completes successfully or fails, with information on the completed stage.
*/
@Override
public void onStageCompleted( SparkListenerStageCompleted stageCompleted) { } @Override
public void onStageSubmitted( SparkListenerStageSubmitted stageSubmitted) { } @Override
public void onTaskStart(SparkListenerTaskStart taskStart) { }
/**
* Called when a job ends
*/
@Override
public void onJobEnd(SparkListenerJobEnd jobEnd) {
JobResult jobResult = jobEnd.jobResult();
System.err.println("自定义监听器jobEnd jobResult:"+jobResult);
}
/**
* Called when a job starts
*/
@Override
public void onJobStart(SparkListenerJobStart jobStart) {
System.err.println("自定义监听器jobStart,jobId:"+jobStart.jobId());
System.err.println("自定义监听器jobStart,该job下stage数量:"+jobStart.stageInfos().size());
} @Override
public void onExecutorMetricsUpdate(SparkListenerExecutorMetricsUpdate executorMetricsUpdate) { } @Override
public void onExecutorAdded(SparkListenerExecutorAdded executorAdded) { } @Override
public void onNodeUnblacklisted(SparkListenerNodeUnblacklisted nodeUnblacklisted) { }
/**
* Called when the application ends
*/
@Override
public void onApplicationEnd(SparkListenerApplicationEnd applicationEnd) {
System.err.println("Application结束,时间:"+applicationEnd.time());
} @Override
public void onNodeBlacklisted(SparkListenerNodeBlacklisted nodeBlacklisted) { } @Override
public void onUnpersistRDD(SparkListenerUnpersistRDD unpersistRDD) { } @Override
public void onTaskGettingResult(SparkListenerTaskGettingResult taskGettingResult) { } @Override
public void onOtherEvent(SparkListenerEvent event) { } @Override
public void onEnvironmentUpdate(SparkListenerEnvironmentUpdate environmentUpdate) { } @Override
public void onSpeculativeTaskSubmitted(SparkListenerSpeculativeTaskSubmitted speculativeTask) { } @Override
public void onExecutorBlacklisted(SparkListenerExecutorBlacklisted executorBlacklisted) { } @Override
public void onBlockManagerRemoved(SparkListenerBlockManagerRemoved blockManagerRemoved) { }
/**
* Called when the application starts
*/
@Override
public void onApplicationStart(SparkListenerApplicationStart applicationStart) {
System.err.println("Application启动,appName:"+applicationStart.appName()+",appID"+
applicationStart.appId());
} @Override
public void onExecutorUnblacklisted(SparkListenerExecutorUnblacklisted executorUnblacklisted) { } @Override
public void onBlockManagerAdded(SparkListenerBlockManagerAdded blockManagerAdded) { } @Override
public void onBlockUpdated(SparkListenerBlockUpdated blockUpdated) { } @Override
public void onTaskEnd(SparkListenerTaskEnd taskEnd) { }
}); String sql1 = "select roadid,count(1) cn from gridmappingroad group by roadid";
spark.sql(sql1).repartition(2).write().mode(SaveMode.Overwrite)
.saveAsTable("test_listener_table"); spark.stop();
}
}

运行日志:

// :: INFO spark.SparkContext: Running Spark version 2.3.
// :: INFO spark.SparkContext: Submitted application: testSparkListener
// :: INFO spark.SecurityManager: Changing view acls to: kongshuaiwei,etluser
// :: INFO spark.SecurityManager: Changing modify acls to: kongshuaiwei,etluser
// :: INFO spark.SecurityManager: Changing view acls groups to:
// :: INFO spark.SecurityManager: Changing modify acls groups to:
// :: INFO spark.SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(kongshuaiwei, etluser); groups with view permissions: Set(); users with modify permissions: Set(kongshuaiwei, etluser); groups with modify permissions: Set()
// :: INFO util.Utils: Successfully started service 'sparkDriver' on port .
// :: INFO spark.SparkEnv: Registering MapOutputTracker
// :: INFO spark.SparkEnv: Registering BlockManagerMaster
// :: INFO storage.BlockManagerMasterEndpoint: Using org.apache.spark.storage.DefaultTopologyMapper for getting topology information
// :: INFO storage.BlockManagerMasterEndpoint: BlockManagerMasterEndpoint up
// :: INFO storage.DiskBlockManager: Created local directory at C:\Users\kongshuaiwei\AppData\Local\Temp\blockmgr-b8c578de--4cf3-9e8d-928159f3aecd
// :: INFO memory.MemoryStore: MemoryStore started with capacity 898.5 MB
// :: INFO spark.SparkEnv: Registering OutputCommitCoordinator
// :: INFO util.log: Logging initialized @1729ms
// :: INFO server.Server: jetty-9.3.z-SNAPSHOT
// :: INFO server.Server: Started @1788ms
// :: INFO server.AbstractConnector: Started ServerConnector@5cc5b667{HTTP/1.1,[http/1.1]}{0.0.0.0:}
// :: INFO util.Utils: Successfully started service 'SparkUI' on port .
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@410954b{/jobs,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@10b892d5{/jobs/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3d3f761a{/jobs/job,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@579d011c{/jobs/job/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@3670f00{/stages,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@452e26d0{/stages/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@46ab18da{/stages/stage,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@7689ddef{/stages/stage/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@687a762c{/stages/pool,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1a2e2935{/stages/pool/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@733c423e{/storage,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4b629f13{/storage/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@70925b45{/storage/rdd,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@1b9ea3e3{/storage/rdd/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@aa22f1c{/environment,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@55e7a35c{/environment/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@37cd92d6{/executors,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@5922ae77{/executors/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4263b080{/executors/threadDump,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@2af616d3{/executors/threadDump/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@71f67a79{/static,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@34abdee4{/,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@71a9b4c7{/api,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@21ca139c{/jobs/job/kill,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@226f885f{/stages/stage/kill,null,AVAILABLE,@Spark}
// :: INFO ui.SparkUI: Bound SparkUI to 0.0.0.0, and started at http://sl1-43087-b01.BJ.DATANGMOBILE.com:4040
// :: INFO executor.Executor: Starting executor ID driver on host localhost
// :: INFO util.Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port .
// :: INFO netty.NettyBlockTransferService: Server created on sl1--b01.BJ.DATANGMOBILE.com:
// :: INFO storage.BlockManager: Using org.apache.spark.storage.RandomBlockReplicationPolicy for block replication policy
// :: INFO storage.BlockManagerMaster: Registering BlockManager BlockManagerId(driver, sl1--b01.BJ.DATANGMOBILE.com, , None)
// :: INFO storage.BlockManagerMasterEndpoint: Registering block manager sl1--b01.BJ.DATANGMOBILE.com: with 898.5 MB RAM, BlockManagerId(driver, sl1--b01.BJ.DATANGMOBILE.com, , None)
// :: INFO storage.BlockManagerMaster: Registered BlockManager BlockManagerId(driver, sl1--b01.BJ.DATANGMOBILE.com, , None)
// :: INFO storage.BlockManager: Initialized BlockManager: BlockManagerId(driver, sl1--b01.BJ.DATANGMOBILE.com, , None)
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@33a630fa{/metrics/json,null,AVAILABLE,@Spark}
// :: INFO internal.SharedState: loading hive config file: file:/D:/ideaIC/workspace/spark-project/coverOptimize/target/classes/hive-site.xml
// :: INFO internal.SharedState: spark.sql.warehouse.dir is not set, but hive.metastore.warehouse.dir is set. Setting spark.sql.warehouse.dir to the value of hive.metastore.warehouse.dir ('/user/hive/warehouse').
// :: INFO internal.SharedState: Warehouse path is '/user/hive/warehouse'.
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@2fb5fe30{/SQL,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@456be73c{/SQL/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@41a6d121{/SQL/execution,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@4f449e8f{/SQL/execution/json,null,AVAILABLE,@Spark}
// :: INFO handler.ContextHandler: Started o.s.j.s.ServletContextHandler@27e32fe4{/static/sql,null,AVAILABLE,@Spark}
// :: INFO state.StateStoreCoordinatorRef: Registered StateStoreCoordinator endpoint
// :: INFO hive.HiveUtils: Initializing HiveMetastoreConnection version 1.2. using Spark classes.
// :: INFO hive.metastore: Trying to connect to metastore with URI thrift://worker03.xxx.xxx.cn:9083// :: INFO hive.metastore: Connected to metastore.
// :: INFO session.SessionState: Created local directory: C:/Users/KONGSH~/AppData/Local/Temp/f35b7531-c964-4d2e-8ba5-b5ade205d12a_resources
// :: INFO session.SessionState: Created HDFS directory: /tmp/hive/etluser/f35b7531-c964-4d2e-8ba5-b5ade205d12a
// :: INFO session.SessionState: Created local directory: C:/Users/KONGSH~/AppData/Local/Temp/kongshuaiwei/f35b7531-c964-4d2e-8ba5-b5ade205d12a
// :: INFO session.SessionState: Created HDFS directory: /tmp/hive/etluser/f35b7531-c964-4d2e-8ba5-b5ade205d12a/_tmp_space.db
// :: INFO client.HiveClientImpl: Warehouse location for Hive client (version 1.2.) is /user/hive/warehouse
// :: INFO parquet.ParquetFileFormat: Using default output committer for Parquet: org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO datasources.SQLHadoopMapReduceCommitProtocol: Using user defined output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO datasources.SQLHadoopMapReduceCommitProtocol: Using output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO codegen.CodeGenerator: Code generated in 194.663548 ms
// :: INFO codegen.CodeGenerator: Code generated in 42.50705 ms
// :: INFO memory.MemoryStore: Block broadcast_0 stored as values in memory (estimated size 249.3 KB, free 898.3 MB)
// :: INFO memory.MemoryStore: Block broadcast_0_piece0 stored as bytes in memory (estimated size 24.6 KB, free 898.2 MB)
// :: INFO storage.BlockManagerInfo: Added broadcast_0_piece0 in memory on sl1--b01.BJ.DATANGMOBILE.com: (size: 24.6 KB, free: 898.5 MB)
// :: INFO spark.ContextCleaner: Cleaned accumulator
// :: INFO spark.ContextCleaner: Cleaned accumulator
// :: INFO spark.SparkContext: Created broadcast from
// :: INFO spark.ContextCleaner: Cleaned accumulator
// :: INFO spark.ContextCleaner: Cleaned accumulator
// :: WARN security.UserGroupInformation: No groups available for user etluser
// :: WARN security.UserGroupInformation: No groups available for user etluser
// :: INFO mapred.FileInputFormat: Total input paths to process :
.....
// :: INFO scheduler.DAGScheduler: Registering RDD (saveAsTable at SparkTest.java:)
// :: INFO scheduler.DAGScheduler: Got map stage job (saveAsTable at SparkTest.java:) with output partitions
// :: INFO scheduler.DAGScheduler: Final stage: ShuffleMapStage (saveAsTable at SparkTest.java:)
// :: INFO scheduler.DAGScheduler: Parents of final stage: List()
// :: INFO scheduler.DAGScheduler: Missing parents: List()
自定义监听器jobStart,jobId:
自定义监听器jobStart,该job下stage数量:
// :: INFO scheduler.DAGScheduler: Submitting ShuffleMapStage (MapPartitionsRDD[] at saveAsTable at SparkTest.java:), which has no missing parents
// :: INFO memory.MemoryStore: Block broadcast_1 stored as values in memory (estimated size 30.1 KB, free 898.2 MB)
// :: INFO memory.MemoryStore: Block broadcast_1_piece0 stored as bytes in memory (estimated size 13.7 KB, free 898.2 MB)
// :: INFO storage.BlockManagerInfo: Added broadcast_1_piece0 in memory on sl1--b01.BJ.DATANGMOBILE.com: (size: 13.7 KB, free: 898.5 MB)
// :: INFO spark.SparkContext: Created broadcast from broadcast at DAGScheduler.scala:
...
// :: INFO scheduler.TaskSetManager: Finished task 30.0 in stage 0.0 (TID ) in ms on localhost (executor driver) (/)
// :: INFO scheduler.TaskSchedulerImpl: Removed TaskSet 0.0, whose tasks have all completed, from pool
// :: INFO scheduler.DAGScheduler: ShuffleMapStage (saveAsTable at SparkTest.java:) finished in 359.468 s
// :: INFO scheduler.DAGScheduler: looking for newly runnable stages
// :: INFO scheduler.DAGScheduler: running: Set()
// :: INFO scheduler.DAGScheduler: waiting: Set()
// :: INFO scheduler.DAGScheduler: failed: Set()
// :: INFO exchange.ExchangeCoordinator: advisoryTargetPostShuffleInputSize: , targetPostShuffleInputSize .
自定义监听器jobEnd jobResult:JobSucceeded
// :: INFO spark.SparkContext: Starting job: saveAsTable at SparkTest.java:
// :: INFO scheduler.DAGScheduler: Registering RDD (saveAsTable at SparkTest.java:)
// :: INFO scheduler.DAGScheduler: Got job (saveAsTable at SparkTest.java:) with output partitions
// :: INFO scheduler.DAGScheduler: Final stage: ResultStage (saveAsTable at SparkTest.java:)
// :: INFO scheduler.DAGScheduler: Parents of final stage: List(ShuffleMapStage )
// :: INFO scheduler.DAGScheduler: Missing parents: List(ShuffleMapStage )
自定义监听器jobStart,jobId:
自定义监听器jobStart,该job下stage数量:
// :: INFO scheduler.DAGScheduler: Submitting ShuffleMapStage (MapPartitionsRDD[] at saveAsTable at SparkTest.java:), which has no missing parents
// :: INFO memory.MemoryStore: Block broadcast_2 stored as values in memory (estimated size 22.2 KB, free 898.2 MB)
// :: INFO memory.MemoryStore: Block broadcast_2_piece0 stored as bytes in memory (estimated size 10.5 KB, free 898.2 MB)
// :: INFO storage.BlockManagerInfo: Added broadcast_2_piece0 in memory on sl1--b01.BJ.DATANGMOBILE.com: (size: 10.5 KB, free: 898.5 MB)
// :: INFO spark.SparkContext: Created broadcast from broadcast at DAGScheduler.scala:
// :: INFO scheduler.DAGScheduler: Submitting missing tasks from ShuffleMapStage (MapPartitionsRDD[] at saveAsTable at SparkTest.java:) (first tasks are for partitions Vector())
// :: INFO scheduler.TaskSchedulerImpl: Adding task set 2.0 with tasks
// :: INFO scheduler.TaskSetManager: Starting task 0.0 in stage 2.0 (TID , localhost, executor driver, partition , PROCESS_LOCAL, bytes)
// :: INFO executor.Executor: Running task 0.0 in stage 2.0 (TID )
// :: INFO storage.ShuffleBlockFetcherIterator: Getting non-empty blocks out of blocks
// :: INFO storage.ShuffleBlockFetcherIterator: Started remote fetches in ms
// :: INFO executor.Executor: Finished task 0.0 in stage 2.0 (TID ). bytes result sent to driver
// :: INFO scheduler.TaskSetManager: Finished task 0.0 in stage 2.0 (TID ) in ms on localhost (executor driver) (/)
// :: INFO scheduler.TaskSchedulerImpl: Removed TaskSet 2.0, whose tasks have all completed, from pool
// :: INFO scheduler.DAGScheduler: ShuffleMapStage (saveAsTable at SparkTest.java:) finished in 0.135 s
// :: INFO scheduler.DAGScheduler: looking for newly runnable stages
// :: INFO scheduler.DAGScheduler: running: Set()
// :: INFO scheduler.DAGScheduler: waiting: Set(ResultStage )
// :: INFO scheduler.DAGScheduler: failed: Set()
// :: INFO scheduler.DAGScheduler: Submitting ResultStage (ShuffledRowRDD[] at saveAsTable at SparkTest.java:), which has no missing parents
// :: INFO memory.MemoryStore: Block broadcast_3 stored as values in memory (estimated size 148.9 KB, free 898.0 MB)
// :: INFO memory.MemoryStore: Block broadcast_3_piece0 stored as bytes in memory (estimated size 52.1 KB, free 898.0 MB)
// :: INFO storage.BlockManagerInfo: Added broadcast_3_piece0 in memory on sl1--b01.BJ.DATANGMOBILE.com: (size: 52.1 KB, free: 898.4 MB)
// :: INFO spark.SparkContext: Created broadcast from broadcast at DAGScheduler.scala:
// :: INFO scheduler.DAGScheduler: Submitting missing tasks from ResultStage (ShuffledRowRDD[] at saveAsTable at SparkTest.java:) (first tasks are for partitions Vector(, ))
// :: INFO scheduler.TaskSchedulerImpl: Adding task set 3.0 with tasks
// :: INFO scheduler.TaskSetManager: Starting task 0.0 in stage 3.0 (TID , localhost, executor driver, partition , ANY, bytes)
// :: INFO executor.Executor: Running task 0.0 in stage 3.0 (TID )
// :: INFO storage.ShuffleBlockFetcherIterator: Getting non-empty blocks out of blocks
// :: INFO storage.ShuffleBlockFetcherIterator: Started remote fetches in ms
// :: INFO datasources.SQLHadoopMapReduceCommitProtocol: Using user defined output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO datasources.SQLHadoopMapReduceCommitProtocol: Using output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO parquet.ParquetWriteSupport: Initialized Parquet WriteSupport with Catalyst schema:
{
"type" : "struct",
"fields" : [ {
"name" : "roadid",
"type" : "string",
"nullable" : true,
"metadata" : {
"comment" : "??id"
}
}, {
"name" : "cn",
"type" : "long",
"nullable" : false,
"metadata" : { }
} ]
}
and corresponding Parquet message type:
message spark_schema {
optional binary roadid (UTF8);
required int64 cn;
} // :: INFO compress.CodecPool: Got brand-new compressor [.snappy]
// :: INFO output.FileOutputCommitter: Saved output of task 'attempt_20200117135657_0003_m_000000_0' to hdfs://master01.xxx.xxx.cn:8020/user/hive/warehouse/coveroptimize.db/test_listener_table/_temporary/0/task_20200117135657_0003_m_000000
// :: INFO mapred.SparkHadoopMapRedUtil: attempt_20200117135657_0003_m_000000_0: Committed
// :: INFO executor.Executor: Finished task 0.0 in stage 3.0 (TID ). bytes result sent to driver
// :: INFO scheduler.TaskSetManager: Starting task 1.0 in stage 3.0 (TID , localhost, executor driver, partition , ANY, bytes)
// :: INFO executor.Executor: Running task 1.0 in stage 3.0 (TID )
// :: INFO scheduler.TaskSetManager: Finished task 0.0 in stage 3.0 (TID ) in ms on localhost (executor driver) (/)
// :: INFO storage.ShuffleBlockFetcherIterator: Getting non-empty blocks out of blocks
// :: INFO storage.ShuffleBlockFetcherIterator: Started remote fetches in ms
// :: INFO datasources.SQLHadoopMapReduceCommitProtocol: Using user defined output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO datasources.SQLHadoopMapReduceCommitProtocol: Using output committer class org.apache.parquet.hadoop.ParquetOutputCommitter
// :: INFO parquet.ParquetWriteSupport: Initialized Parquet WriteSupport with Catalyst schema:
{
"type" : "struct",
"fields" : [ {
"name" : "roadid",
"type" : "string",
"nullable" : true,
"metadata" : {
"comment" : "??id"
}
}, {
"name" : "cn",
"type" : "long",
"nullable" : false,
"metadata" : { }
} ]
}
and corresponding Parquet message type:
message spark_schema {
optional binary roadid (UTF8);
required int64 cn;
} // :: INFO output.FileOutputCommitter: Saved output of task 'attempt_20200117135657_0003_m_000001_0' to hdfs://master01.xxx.xxx.cn:8020/user/hive/warehouse/coveroptimize.db/test_listener_table/_temporary/0/task_20200117135657_0003_m_000001
// :: INFO mapred.SparkHadoopMapRedUtil: attempt_20200117135657_0003_m_000001_0: Committed
// :: INFO executor.Executor: Finished task 1.0 in stage 3.0 (TID ). bytes result sent to driver
// :: INFO scheduler.TaskSetManager: Finished task 1.0 in stage 3.0 (TID ) in ms on localhost (executor driver) (/)
// :: INFO scheduler.TaskSchedulerImpl: Removed TaskSet 3.0, whose tasks have all completed, from pool
// :: INFO scheduler.DAGScheduler: ResultStage (saveAsTable at SparkTest.java:) finished in 0.466 s
自定义监听器jobEnd jobResult:JobSucceeded
// :: INFO scheduler.DAGScheduler: Job finished: saveAsTable at SparkTest.java:, took 0.616115 s
// :: INFO datasources.FileFormatWriter: Job null committed.
// :: INFO datasources.FileFormatWriter: Finished processing stats for job null.
// :: INFO hive.HiveExternalCatalog: Persisting file based data source table `coveroptimize`.`test_listener_table` into Hive metastore in Hive compatible format.
Application结束,时间:
// :: INFO server.AbstractConnector: Stopped Spark@5cc5b667{HTTP/1.1,[http/1.1]}{0.0.0.0:}
// :: INFO ui.SparkUI: Stopped Spark web UI at http://sl1-43087-b01.BJ.DATANGMOBILE.com:4040
// :: INFO spark.MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!
// :: INFO memory.MemoryStore: MemoryStore cleared
// :: INFO storage.BlockManager: BlockManager stopped
// :: INFO storage.BlockManagerMaster: BlockManagerMaster stopped
// :: INFO scheduler.OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!
// :: INFO spark.SparkContext: Successfully stopped SparkContext
// :: INFO util.ShutdownHookManager: Shutdown hook called
// :: INFO util.ShutdownHookManager: Deleting directory C:\Users\kongshuaiwei\AppData\Local\Temp\spark-547cf37e-2d1e-433c-a584-6c5b7365909f Process finished with exit code

通过SparkListener监控spark应用的更多相关文章

  1. 监控Spark应用方法简介

    监控Spark应用有很多种方法. Web接口每一个SparkContext启动一个web UI用来展示应用相关的一些非常有用的信息,默认在4040端口.这些信息包括: 任务和调度状态的列表RDD大小和 ...

  2. Ubuntu 14.10 下Ganglia监控Spark集群

    由于Licene的限制,没有放到默认的build里面,所以在官方网站下载的二进制文件中并不包含Gangla模块,如果需要使用,需要自己编译.在使用Maven编译Spark的时候,我们可以加上-Pspa ...

  3. Spark2.2(三十九):如何根据appName监控spark任务,当任务不存在则启动(任务存在当超过多久没有活动状态则kill,等待下次启动)

    业务需求 实现一个根据spark任务的appName来监控任务是否存在,及任务是否卡死的监控. 1)给定一个appName,根据appName从yarn application -list中验证任务是 ...

  4. Spark应用监控解决方案--使用Prometheus和Grafana监控Spark应用

    Spark任务启动后,我们通常都是通过跳板机去Spark UI界面查看对应任务的信息,一旦任务多了之后,这将会是让人头疼的问题.如果能将所有任务信息集中起来监控,那将会是很完美的事情. 通过Spark ...

  5. Spark(五十):使用JvisualVM监控Spark Executor JVM

    引导 Windows环境下JvisulaVM一般存在于安装了JDK的目录${JAVA_HOME}/bin/JvisualVM.exe,它支持(本地和远程)jstatd和JMX两种方式连接远程JVM. ...

  6. 使用 JvisualVM 监控 spark executor

    使用 JvisualVM,需要先配置 java 的启动参数 jmx 正常情况下,如下配置 -Dcom.sun.management.jmxremote -Dcom.sun.management.jmx ...

  7. spark的运行指标监控

    sparkUi的4040界面已经有了运行监控指标,为什么我们还要自定义存入redis? 1.结合自己的业务,可以将监控页面集成到自己的数据平台内,方便问题查找,邮件告警 2.可以在sparkUi的基础 ...

  8. Spark的Straggler深入学习(1):如何在本地图形监控远程Spark的GC情况——使用java自带的jvisualvm

    一.本文的目的       Straggler是目前研究的热点,Spark中也存在Straggler的问题.GC问题是总所周知的导致Straggler的重要因素之一,为了了解GC导致的Straggle ...

  9. Spark监控官方文档学习笔记

    任务的监控和使用 有几种方式监控spark应用:Web UI,指标和外部方法 Web接口 每个SparkContext都会启动一个web UI,默认是4040端口,用来展示一些信息: 一系列调度的st ...

随机推荐

  1. 中国电信与小米成立5G联合创新实验室

    导读 中国电信与小米成立5G联合创新实验室 近日,在中国电信战略与创新研究院,小米与中国电信共同发起的5G联合创新实验室正式揭牌成立.双方将充分发挥技术.网络.产品和生态的优势,围绕“5G+AIoT” ...

  2. 树莓派4B踩坑指南 - (12)谷歌浏览器书签同步

    书签和插件不能同步真的是不方便..使用时删掉※符号 过程比较复杂,坑很多,但确认有效 免费访问说明: https://github.com/max2max/fre※es※s 软件安装 https:// ...

  3. case语句!

    1.case 语句概述(1)case 语句的作用使用 case 语句改写 if 多分支可以使脚本结构更加清晰.层次分明.针对变量的不同取值,执行不同的命令序列.2.case 语句的结构:case 变量 ...

  4. RPC远程服务调用

    RPC远程服务调用: RPC 的全称是 Remote Procedure Call 是一种进程间通信方式. 它允许程序调用另一个地址空间(通常是共享网络的另一台机器上)的过程或函数,而不用程序员显式编 ...

  5. 【C++初学者自学笔记三】哑元函数、缺省参数、内联函数(模块二,PS:需要用到重载函数)

    一,哑元函数:一个函数的参数只有类型没有名字的则这个参数称之为哑元.类似于void fun(int); 功能:1保持向前的兼容性,比方说我们需要做成一个成品,然后成品是会不断的更新第一代第二代,当我们 ...

  6. PTA的Python练习题(二)

    继续在PTA上练习Python (从 第2章-5 求奇数分之一序列前N项和  开始) 1. x=int(input()) a=i=1 s=0 while(i<=x): s=s+1/a a=a+2 ...

  7. C++结构体struct与C语⾔结构体和C++引⽤&与传值的区别

    写再最前面:摘录于柳神的笔记: (1)定义好结构体 stu 之后,使⽤这个结构体类型的时候,C语⾔需要写关键字 struct ,⽽C++⾥⾯可以省 略不写: (2)这个引⽤符号 & 要和C语⾔ ...

  8. 树莓派4B踩坑指南 - (2)安装系统及初始化

    安装系统及初始化 格式化TF卡:SDFormatter 4.0.如果需要换系统,则必须先烧录进一个空img,然后再格式化! 烧录系统:Win32DiskImager-0.9.5 更改默认密码:账号pi ...

  9. mac下安装并启动RabbitMQ

    前言   RabbitMQ是实现了高级消息队列协议(AMQP)的开源消息代理软件(亦称面向消息的中间件).RabbitMQ服务器是用Erlang语言编写的,而群集和故障转移是构建在开放电信平台框架上的 ...

  10. 大数据萌新的Python学习之路(一)

    笔记开始简介 从2018年9月份正式进入大学的时代,大数据和人工智能的崛起让我选择了计算机专业学习数据科学与大数据技术专业,接触的第一门语言就是C语言,后来因为同学推荐的原因进入了学校的人工智能研究协 ...