wordCount

POM文件需要导入的依赖：

<dependency>

            <groupId>org.apache.flink</groupId>

            <artifactId>flink-streaming-java_2.12</artifactId>

            <version>${flink.version}</version>

        </dependency>

        <dependency>

            <groupId>org.apache.flink</groupId>

            <artifactId>flink-table_2.12</artifactId>

            <version>${flink.version}</version>

        </dependency>

        <dependency>

            <groupId>org.apache.flink</groupId>

            <artifactId>flink-java</artifactId>

            <version>${flink.version}</version>

        </dependency>

        <dependency>

            <groupId>org.apache.flink</groupId>

            <artifactId>flink-clients_2.12</artifactId>

            <version>${flink.version}</version>

        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-scala -->

        <dependency>

            <groupId>org.apache.flink</groupId>

            <artifactId>flink-scala_2.12</artifactId>

            <version>1.7.1</version>

        </dependency>

        <!-- https://mvnrepository.com/artifact/org.apache.flink/flink-streaming-scala -->

        <dependency>

            <groupId>org.apache.flink</groupId>

            <artifactId>flink-streaming-scala_2.12</artifactId>

            <version>1.7.1</version>

        </dependency>

离线代码：

java版本:

package flink;

import org.apache.flink.api.java.DataSet;

import org.apache.flink.api.java.ExecutionEnvironment;

import org.apache.flink.api.java.tuple.Tuple2;

public class WordExample {

    public static void main(String[] args) throws Exception {

        final ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();

        //创建构建字符串的数据集

        DataSet<String> text = env.fromElements(

                "flink test","" +

                        "I think I hear them. Stand, ho! Who's there?");

        //分割字符串，按照key进行分组，统计相同的key个数

        DataSet<Tuple2<String, Integer>> wordCount = text.flatMap(new LineSplitter())

                .groupBy(0).sum(1);

        wordCount.print();

    }

}

package flink;

import org.apache.flink.api.common.functions.FlatMapFunction;

import org.apache.flink.api.java.tuple.Tuple2;

import org.apache.flink.util.Collector;

public class LineSplitter implements FlatMapFunction<String, Tuple2<String,Integer>> {

    @Override

    public void flatMap(String o, Collector<Tuple2<String, Integer>> collector) throws Exception {

        for (String word : o.split(" ")) {

            collector.collect(new Tuple2<String, Integer>(word,1));

        }

    }

}

scala版本：

package flink

import org.apache.flink.api.scala._

object WordCountExample {

  def main(args: Array[String]): Unit = {

    val env = ExecutionEnvironment.getExecutionEnvironment

    val text = env.fromElements("Who's there?",

      "I think I hear them. Stand, ho! Who's there?")

    val counts = text.flatMap(_.toLowerCase().split("\\W+")filter(_.nonEmpty))

      .map((_,1)).groupBy(0).sum(1)

    counts.print()

  }

}

流式：

　java版本：

package flink;

import org.apache.flink.api.common.functions.FlatMapFunction;

import org.apache.flink.api.common.functions.ReduceFunction;

import org.apache.flink.api.java.utils.ParameterTool;

import org.apache.flink.streaming.api.datastream.DataStream;

import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;

import org.apache.flink.streaming.api.windowing.time.Time;

import org.apache.flink.util.Collector;

public class WordCount {

    public static void main(String[] args) throws Exception {

        final int port;

        try {

            final ParameterTool params = ParameterTool.fromArgs(args);

            port = params.getInt("port");

        } catch (Exception e) {

            System.out.println("No port specified.Please run 'SocketWindowWordCount--port <port>'");

            return;

        }

        //get the execution enviroment

        final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        //get input data by connecting to the socket

        DataStream<String> text = env.socketTextStream("localhost", port, '\n');

        //parse the data,group it.window it,and aggregeate the counts

        DataStream<WordWithCount> windowCounts = text

                .flatMap(new FlatMapFunction<String, WordWithCount>() {

                    @Override

                    public void flatMap(String s, Collector<WordWithCount> collector) {

                        for (String word : s.split("\\s")) {

                            collector.collect(new WordWithCount(word, 1L));

                        }

                    }

                }).keyBy("word").timeWindow(Time.seconds(10), Time.seconds(5))

                .reduce(new ReduceFunction<WordWithCount>() {

                    @Override

                    public WordWithCount reduce(WordWithCount wordWithCount, WordWithCount t1) throws Exception {

                        return new WordWithCount(wordWithCount.word, wordWithCount.count + t1.count);

                    }

                });

        //print the result with a single thread,rather than in parallel

        windowCounts.print().setParallelism(1);

        env.execute("Socket Window WordCount");

    }

}

package flink;

public class WordWithCount {

    public String word;

    public long count;

    public WordWithCount() {

    }

    public WordWithCount(String word, long count) {

        this.word = word;

        this.count = count;

    }

    @Override

    public String toString() {

        return word + ":" + count;

    }

}

　　scala版本

package flink

import org.apache.flink.api.java.utils.ParameterTool

import org.apache.flink.api.scala._

import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment

import org.apache.flink.streaming.api.windowing.time.Time

object SokcetWindowWordCount {

  case class WordWithCount(word: String, count: Long)

  def main(args: Array[String]): Unit = {

    //the port to connect to

    val port: Int = try {

      ParameterTool.fromArgs(args).getInt("port")

    } catch {

      case e: Exception => {

        System.err.println("No port specified.Please run 'SocketWindowWordCount --port<port>'")

        return

      }

    }

    //get the execution enviroment

    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment

    //parse input data by connecting to the socket

    val text = env.socketTextStream("localhost", port, '\n')

    //parse the data.group it.window it.and aggregate the counts

    val windowCount = text

      .flatMap{w => w.split("\\s")}

      .map{w => WordWithCount(w, 1)}

      .keyBy("word")

      .timeWindow(Time.seconds(10), Time.seconds(5))

      .sum("count")

    //print the results with a single thread ,rather than in parallel

    windowCount.print().setParallelism(1)

    env.execute("Socket Window WordCount")

  }

}

　　运行，传参：

终端使用nc命令进行模拟发送数据到9999端口

　　运行结果：

　　注意事项：

　　　　千万不要把包导错了，java就导java，scala就导scala，如果导错，程序跑不起来

flink入门的更多相关文章

Flink入门（二）——Flink架构介绍
1.基本组件栈了解Spark的朋友会发现Flink的架构和Spark是非常类似的,在整个软件架构体系中,同样遵循着分层的架构设计理念,在降低系统耦合度的同时,也为上层用户构建Flink应用提供了丰富 ...
Flink入门（三）——环境与部署
flink是一款开源的大数据流式处理框架,他可以同时批处理和流处理,具有容错性.高吞吐.低延迟等优势,本文简述flink在windows和linux中安装步骤,和示例程序的运行,包括本地调试环境,集群 ...
Flink入门（四）——编程模型
flink是一款开源的大数据流式处理框架,他可以同时批处理和流处理,具有容错性.高吞吐.低延迟等优势,本文简述flink的编程模型. 数据集类型: 无穷数据集:无穷的持续集成的数据集合有界数据集:有 ...
Flink入门（五）——DataSet Api编程指南
Apache Flink Apache Flink 是一个兼顾高吞吐.低延迟.高性能的分布式处理框架.在实时计算崛起的今天,Flink正在飞速发展.由于性能的优势和兼顾批处理,流处理的特性,Flink ...
不一样的Flink入门教程
前言微信搜[Java3y]关注这个朴实无华的男人,点赞关注是对我最大的支持! 文本已收录至我的GitHub:https://github.com/ZhongFuCheng3y/3y,有300多篇原创 ...
Flink入门-第一篇：Flink基础概念以及竞品对比
Flink入门-第一篇:Flink基础概念以及竞品对比 Flink介绍截止2021年10月Flink最新的稳定版本已经发展到1.14.0 Flink起源于一个名为Stratosphere的研究项目主 ...
flink 入门
http://ifeve.com/flink-quick-start/ http://vinoyang.com/2016/05/02/flink-concepts/ http://wuchong.me ...
Flink入门宝典（详细截图版）
本文基于java构建Flink1.9版本入门程序,需要Maven 3.0.4 和 Java 8 以上版本.需要安装Netcat进行简单调试. 这里简述安装过程,并使用IDEA进行开发一个简单流处理程序 ...
记一次flink入门学习笔记
团队有几个系统数据量偏大,且每天以几万条的数量累增.有一个系统每天需要定时读取数据库,并进行相关的业务逻辑计算,从而获取最新的用户信息,定时任务的整个耗时需要4小时左右.由于定时任务是夜晚执行,目前看 ...
第02讲：Flink 入门程序 WordCount 和 SQL 实现
我们右键运行时相当于在本地启动了一个单机版本.生产中都是集群环境,并且是高可用的,生产上提交任务需要用到flink run 命令,指定必要的参数. 本课时我们主要介绍 Flink 的入门程序以及 SQ ...

随机推荐

js 事件模型
说到事件,就要追溯到网景与微软的“浏览器大战”了.当时,事件模型还没有标准,两家公司的实现就是事实标准.网景在Navigator中实现了“事件捕获”的事件系统,而微软则在IE中实现了一个基本上相反的事 ...
C#4.0 HTTP协议无法使用TLS1.2的问题
在发送HTTP请求前加入下行代码 ServicePointManager.SecurityProtocol = (SecurityProtocolType) | (SecurityProtocolTy ...
GitHub学习总结
一.安装git:下载Git,官网地址:https://git-scm.com/,进入官网首页,然后点击download,下载对应系统的git就行. 二.我用的是win系统,打开git界面是这样的: 在 ...
ignore_user_abort(true); set_time_limit(0);程序在本地测试可以一直运行，上传服务器只能运行10-15分钟
当PHP运行在安全模式下时此函数无效.除了关闭安全模式或者在php.ini程序中修改最大运行时间没有其他办法让此函数运行. php.ini 中缺省的最长执行时间是 30 秒,这是由 php.ini 中 ...
2018-2019-1 20189203《Linux内核原理与分析》第三周作业
一.课程学习计算机的三个法宝:存储程序计算机.函数调用堆栈.中断. 堆栈相关的寄存器:ESP(堆栈指针).EBP(基址指针). 堆栈操作:push:栈顶地址减少4个字节,并将操作数放入栈顶存储单元. ...
python相关学习文档收集
bs4中文文档: 用于网页爬虫 https://beautifulsoup.readthedocs.io/zh_CN/v4.4.0/ GitLab-CI 从安装到差点放弃 https://segm ...
java 知识点
1. java泛型避免编译错误 (ArrayList定义时候) 避免强制类型装换 2 多态的体现实现多态主要有以下三种方式:1. 接口实现 2. 继承父类重写方法 3. 同一类中进行方法重载 ...
Netty返回数据丢包的问题之一
这个问题是在一个群友做压力测试的时候发现的.使用客户端和netty创建一条连接,然后写了一个for循环不停的给服务器发送1500条信息,发现返回只有几百条.另外几百条不知道哪去了.查看代码,发现在服务 ...
NOIP2015跳石头
题目描述 Description 一年一度的“跳石头”比赛又要开始了! 这项比赛将在一条笔直的河道中进行,河道中分布着一些巨大岩石.组委会已经选择好了两块岩石作为比赛起点和终点.在起点和终点之间,有N ...
极致21点开发DAY2
今天完成的主要内容时MainScene场景中设置面板的开发.游戏逻辑:点击设置按钮,弹出音乐设置弹框,弹框内容包括音乐的打开与关闭,退出游戏. 点击退出游戏,弹出退出游戏确认面板.效果图: 1.首先在 ...

flink入门

wordCount

离线代码：

java版本:

scala版本：

流式：

java版本：

scala版本

flink入门的更多相关文章

随机推荐

热门专题

　java版本：

　　scala版本