序
  
  本文主要研究一下flink的CsvTableSource
  
  TableSource
  
  flink-table_2.11-1.7.1-sources.jar!/org/apache/flink/table/sources/TableSource.scala
  
  trait TableSource[T] {
  
  /** Returns the [[TypeInformation]] for the return type of the [[TableSource]].
  
  * The fields of the return type are mapped to the table schema based on their name.
  
  *
  
  * @return The type of the returned [[DataSet]] or [[DataStream]].
  
  */
  
  def getReturnType: TypeInformation[T]
  
  /**
  
  * Returns the schema of the produced table.
  
  *
  
  * @return The [[TableSchema]] of the produced table.
  
  */
  
  def getTableSchema: TableSchema
  
  /**
  
  * Describes the table source.
  
  *
  
  * @return A String explaining the [[TableSource]].
  
  */
  
  def explainSource(): String =
  
  TableConnectorUtil.generateRuntimeName(getClass, getTableSchema.getFieldNames)
  
  }
  
  TableSource定义了三个方法,分别是getReturnType、getTableSchema、explainSource
  
  BatchTableSource
  
  flink-table_2.11-1.7.1-sources.jar!/org/apache/flink/table/sources/BatchTableSource.scala
  
  trait BatchTableSource[T] extends TableSource[T] {
  
  /**
  
  * Returns the data of the table as a [[DataSet]].
  
  *
  
  * NOTE: This method is for internal use only for defining a [[TableSource]].
  
  * Do not use it in Table API programs.
  
  */
  
  def getDataSet(execEnv: ExecutionEnvironment): DataSet[T]
  
  }
  
  BatchTableSource继承了TableSource,它定义了getDataSet方法
  
  StreamTableSource
  
  flink-table_2.11-1.7.1-sources.jar!/org/apache/flink/table/sources/StreamTableSource.scala
  
  trait StreamTableSource[T] extends TableSource[T] {
  
  /**
  
  * Returns the data of the table as a [[DataStream]].
  
  *
  
  * NOTE: This method is for internal use only for defining a [[TableSource]].
  
  * Do not use it in Table API programs.
  
  */
  
  def getDataStream(execEnv: StreamExecutionEnvironment): DataStream[T]
  
  }
  
  StreamTableSource继承了TableSource,它定义了getDataStream方法
  
  CsvTableSource
  
  flink-table_2.11-1.7.1-sources.jar!/org/apache/flink/table/sources/CsvTableSource.scala
  
  class CsvTableSource private (
  
  private val path: String,
  
  private val fieldNames: Array[String],
  
  private val fieldTypes: Array[TypeInformation[_]],
  
  private val selectedFields: Array[Int],
  
  private val fieldDelim: String,
  
  private val rowDelim: String,
  
  private val quoteCharacter: Character,
  
  private val ignoreFirstLine: Boolean,
  
  private val ignoreComments: String,
  
  private val lenient: Boolean)
  
  extends BatchTableSource[Row]
  
  with StreamTableSource[Row]
  
  with ProjectableTableSource[Row] {
  
  def this(
  
  path: String,
  
  fieldNames: Array[String],
  
  fieldTypes: Array[TypeInformation[_]],
  
  fieldDelim: String = CsvInputFormat.DEFAULT_FIELD_DELIMITER,
  
  rowDelim: String = CsvInputFormat.DEFAULT_LINE_DELIMITER,
  
  quoteCharacter: Character = null,
  
  ignoreFirstLine: Boolean = false,
  
  ignoreComments: String = null,
  
  lenient: Boolean = false)www.michenggw.com = {
  
  this(
  
  path,
  
  fieldNames,
  
  fieldTypes,
  
  fieldTypes.indices.toArray, // initially, all fields are returned
  
  fieldDelim,
  
  rowDelim,
  
  quoteCharacter,
  
  ignoreFirstLine,
  
  ignoreComments,
  
  lenient)
  
  }
  
  def this(path: String, fieldNames: Array[String]www.fengshen157.com/, fieldTypes: Array[TypeInformation[_]]) = {
  
  this(path, fieldNames, fieldTypes, CsvInputFormat.DEFAULT_FIELD_DELIMITER,
  
  CsvInputFormat.DEFAULT_LINE_DELIMITER, null, false, null, false)
  
  }
  
  if (fieldNames.length != fieldTypes.length) {
  
  throw new TableException("Number of field names and field types must be equal.")
  
  }
  
  private val selectedFieldTypes = selectedFields.map(fieldTypes(_))
  
  private val selectedFieldNames = selectedFields.map(fieldNames(_))
  
  private val returnType: RowTypeInfo = new RowTypeInfo(selectedFieldTypes, selectedFieldNames)
  
  override def getDataSet(execEnv: ExecutionEnvironment): DataSet[Row] = {
  
  execEnv.createInput(createCsvInput(), returnType).name(explainSource())
  
  }
  
  /** Returns the [[RowTypeInfo]] for the return type of the [[CsvTableSource]]. */
  
  override def getReturnType: www.leyouzaixian2.com RowTypeInfo = returnType
  
  override def getDataStream(streamExecEnv: StreamExecutionEnvironment): DataStream[Row] = {
  
  streamExecEnv.createInput(createCsvInput(), returnType).name(explainSource())
  
  }
  
  /** Returns the schema of the produced table. */
  
  override def getTableSchema = new TableSchema(fieldNames, fieldTypes)
  
  /** Returns a copy of [[TableSource]] with ability to project fields */
  
  override def projectFields(fields: Array[Int]): CsvTableSource = {
  
  val selectedFields = if (fields.isEmpty) Array(0) else fields
  
  new CsvTableSource(
  
  path,
  
  fieldNames,
  
  fieldTypes,
  
  selectedFields,
  
  fieldDelim,
  
  rowDelim,
  
  quoteCharacter,
  
  ignoreFirstLine,
  
  ignoreComments,
  
  lenient)
  
  }
  
  private def createCsvInput(): RowCsvInputFormat = {
  
  val inputFormat = new RowCsvInputFormat(
  
  new Path(path),
  
  selectedFieldTypes,
  
  rowDelim,
  
  fieldDelim,
  
  selectedFields)
  
  inputFormat.setSkipFirstLineAsHeader(ignoreFirstLine)
  
  inputFormat.setLenient(www.dasheng178.com lenient)
  
  if (quoteCharacter != null) {
  
  inputFormat.enableQuotedStringParsing(quoteCharacter)
  
  }
  
  if (ignoreComments != null) {
  
  inputFormat.setCommentPrefix(ignoreComments)
  
  }
  
  inputFormat
  
  }
  
  override def equals(other: Any): Boolean = other match {
  
  case that: CsvTableSource => returnType == that.returnType &&
  
  path == that.path &&
  
  fieldDelim == that.fieldDelim &&
  
  rowDelim == that.rowDelim &&
  
  quoteCharacter == that.quoteCharacter &&
  
  ignoreFirstLine == that.ignoreFirstLine &&
  
  ignoreComments == that.ignoreComments &&
  
  lenient == that.lenient
  
  case _ => false
  
  }
  
  override def hashCode(www.hengda157.com): Int = {
  
  returnType.hashCode()
  
  }
  
  override def explainSource(): String = {
  
  s"CsvTableSource(" +
  
  s"read fields: ${getReturnType.getFieldNames.mkString(", ")})"
  
  }
  
  }
  
  CsvTableSource同时实现了BatchTableSource及StreamTableSource接口;getDataSet方法使用ExecutionEnvironment.createInput创建DataSet;getDataStream方法使用StreamExecutionEnvironment.createInput创建DataStream
  
  ExecutionEnvironment.createInput及StreamExecutionEnvironment.createInput接收的InputFormat为RowCsvInputFormat,通过createCsvInput创建而来
  
  getTableSchema方法返回的TableSchema通过fieldNames及fieldTypes创建;getReturnType方法返回的RowTypeInfo通过selectedFieldTypes及selectedFieldNames创建;explainSource方法这里返回的是CsvTableSource开头的字符串
  
  小结
  
  TableSource定义了三个方法,分别是getReturnType、getTableSchema、explainSource;BatchTableSource继承了TableSource,它定义了getDataSet方法;StreamTableSource继承了TableSource,它定义了getDataStream方法
  
  CsvTableSource同时实现了BatchTableSource及StreamTableSource接口;getDataSet方法使用ExecutionEnvironment.createInput创建DataSet;getDataStream方法使用StreamExecutionEnvironment.createInput创建DataStream
  
  ExecutionEnvironment.createInput及StreamExecutionEnvironment.createInput接收的InputFormat为RowCsvInputFormat,通过createCsvInput创建而来;getTableSchema方法返回的TableSchema通过fieldNames及fieldTypes创建;getReturnType方法返回的RowTypeInfo通过selectedFieldTypes及selectedFieldNames创建;explainSource方法这里返回的是CsvTableSource开头的字符串

聊聊flink的CsvTableSource的更多相关文章

  1. 聊聊flink的NetworkEnvironmentConfiguration

    本文主要研究一下flink的NetworkEnvironmentConfiguration NetworkEnvironmentConfiguration flink-1.7.2/flink-runt ...

  2. 聊聊flink Table的groupBy操作

    本文主要研究一下flink Table的groupBy操作 Table.groupBy flink-table_2.11-1.7.0-sources.jar!/org/apache/flink/tab ...

  3. 聊聊flink的AsyncWaitOperator

    序本文主要研究一下flink的AsyncWaitOperator AsyncWaitOperatorflink-streaming-java_2.11-1.7.0-sources.jar!/org/a ...

  4. 聊聊flink的Async I/O

    // This example implements the asynchronous request and callback with Futures that have the // inter ...

  5. 聊聊flink的log.file配置

    本文主要研究一下flink的log.file配置 log4j.properties flink-release-1.6.2/flink-dist/src/main/flink-bin/conf/log ...

  6. [case49]聊聊flink的checkpoint配置

    序 本文主要研究下flink的checkpoint配置 实例 StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecut ...

  7. 聊聊flink的BlobStoreService

    序 本文主要研究一下flink的BlobStoreService BlobView flink-release-1.7.2/flink-runtime/src/main/java/org/apache ...

  8. [源码分析] 从源码入手看 Flink Watermark 之传播过程

    [源码分析] 从源码入手看 Flink Watermark 之传播过程 0x00 摘要 本文将通过源码分析,带领大家熟悉Flink Watermark 之传播过程,顺便也可以对Flink整体逻辑有一个 ...

  9. Flink与Spark Streaming在与kafka结合的区别!

    本文主要是想聊聊flink与kafka结合.当然,单纯的介绍flink与kafka的结合呢,比较单调,也没有可对比性,所以的准备顺便帮大家简单回顾一下Spark Streaming与kafka的结合. ...

随机推荐

  1. cogs2223 [SDOI2016 Round1] 生成魔咒

    cogs2223 [SDOI2016 Round1] 生成魔咒 原题链接 题解 暴力:每次更新后缀数组??? set+二分+hash暴力 http://paste.ubuntu.com/2549629 ...

  2. 180730-Spring之RequestBody的使用姿势小结

    Spring之RequestBody的使用姿势小结 SpringMVC中处理请求参数有好几种不同的方式,如我们常见的下面几种 根据 HttpServletRequest 对象获取 根据 @PathVa ...

  3. openstack-r版(rocky)搭建基于centos7.4 的openstack swift对象存储服务 三

    openstack-r版(rocky)搭建基于centos7.4 的openstack swift对象存储服务 一 openstack-r版(rocky)搭建基于centos7.4 的openstac ...

  4. IO多路复用(二) -- select、poll、epoll实现TCP反射程序

    接着上文IO多路复用(一)-- Select.Poll.Epoll,接下来将演示一个TCP回射程序,源代码来自于该博文https://www.cnblogs.com/Anker/p/3258674.h ...

  5. PIL包中图像的mode参数

    在这里的第一篇. 这篇的是为了说明PIL库中图像的mode参数. 我做的事情是: 在本地找了jpg的图,convert为不同mode,将不同的图截取做了个脑图,有个直观的感觉吧. 把不同mode的图通 ...

  6. Linux 深入理解inode/block/superblock

    基础命令学习目录首页 原文链接:https://blog.csdn.net/Ohmyberry/article/details/80427492 档案系统特性 传统的磁盘与档案系统之应用中,一个分割槽 ...

  7. ES6的新特性(1)——ES6 的概述

    ES6 的概述 首先,感谢马伦老师的ES6新特性的教程. ECMAScript 和 JavaScript 的关系是 ECMAScript 和 JavaScript 的关系是,前者是后者的规格,后者是前 ...

  8. Scrum立会报告+燃尽图(十月二十五日总第十六次)

    此作业要求参见:https://edu.cnblogs.com/campus/nenu/2018fall/homework/2284 项目地址:https://git.coding.net/zhang ...

  9. Alpha版发布 - 感谢有你们

    在本次alpha开发的过程中,很感谢组长王航对我信任,让我统筹大家的工作任务和进度,使我对项目管理有了深刻的理解. 我也要感谢邹双黛,因为我以前很少做文字类的工作,写东西非常生硬,邹双黛即使在有做家教 ...

  10. POJ 2392 Space Elevator 贪心+dp

    题目链接: http://poj.org/problem?id=2392 题意: 给你k类方块,每类方块ci个,每类方块的高度为hi,现在要报所有的方块叠在一起,每类方块的任何一个部分都不能出现在ai ...