package sql;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.sql.DataFrame;
import org.apache.spark.sql.SQLContext; /**
*
*/
public class DataFrameReadJsonOps2 { /**
* @param args
*/
public static void main(String[] args) {
//创建SparkConf用于读取系统配置信息并设置当前应用程序的名字
SparkConf conf = new SparkConf().setAppName("DataFrameOps").setMaster("local");
//创建JavaSparkContext对象实例作为整个Driver的核心基石
JavaSparkContext sc = new JavaSparkContext(conf);
//设置日志级别为WARN
sc.setLogLevel("WARN");
//创建SQLContext上下文对象用于SQL的分析
SQLContext sqlContext = new SQLContext(sc);
//创建Data Frame,可以简单的认为DataFrame是一张表
DataFrame df = sqlContext.read().json("c:/resources/people.json");
//select * from table
df.show();
//desc table
df.printSchema();
//select name from table
df.select(df.col("name")).show();
//select name, age+10 from table
df.select(df.col("name"), df.col("age").plus()).show();
//select * from table where age > 21
df.filter(df.col("age").gt()).show();
//select age, count(1) from table group by age
df.groupBy("age").count().show(); //df.groupBy(df.col("age")).count().show();
} }
//
//SLF4J: Class path contains multiple SLF4J bindings.
//SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-assembly-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
//SLF4J: Found binding in [jar:file:/E:/bigdata/spark-1.4.0-bin-hadoop2.6/lib/spark-examples-1.4.0-hadoop2.6.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
//SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
//SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
//Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
//17/12/29 14:15:10 INFO SparkContext: Running Spark version 1.4.0
//17/12/29 14:15:24 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
//17/12/29 14:15:28 INFO SecurityManager: Changing view acls to: alamps
//17/12/29 14:15:28 INFO SecurityManager: Changing modify acls to: alamps
//17/12/29 14:15:28 INFO SecurityManager: SecurityManager: authentication disabled; ui acls disabled; users with view permissions: Set(alamps); users with modify permissions: Set(alamps)
//17/12/29 14:15:37 INFO Slf4jLogger: Slf4jLogger started
//17/12/29 14:15:39 INFO Remoting: Starting remoting
//17/12/29 14:15:44 INFO Remoting: Remoting started; listening on addresses :[akka.tcp://sparkDriver@172.18.3.7:55458]
//17/12/29 14:15:44 INFO Utils: Successfully started service 'sparkDriver' on port 55458.
//17/12/29 14:15:45 INFO SparkEnv: Registering MapOutputTracker
//17/12/29 14:15:46 INFO SparkEnv: Registering BlockManagerMaster
//17/12/29 14:15:46 INFO DiskBlockManager: Created local directory at C:\Users\alamps\AppData\Local\Temp\spark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80b\blockmgr-660894dd-39d3-4c8a-bf25-ae1d3850953d
//17/12/29 14:15:46 INFO MemoryStore: MemoryStore started with capacity 467.6 MB
//17/12/29 14:15:47 INFO HttpFileServer: HTTP File server directory is C:\Users\alamps\AppData\Local\Temp\spark-cd3ecbc3-41b5-4d8b-8e78-8c2c368ce80b\httpd-106ce90e-d496-4e96-a383-b471aeb5a224
//17/12/29 14:15:47 INFO HttpServer: Starting HTTP Server
//17/12/29 14:15:48 INFO Utils: Successfully started service 'HTTP file server' on port 55464.
//17/12/29 14:15:48 INFO SparkEnv: Registering OutputCommitCoordinator
//17/12/29 14:15:49 INFO Utils: Successfully started service 'SparkUI' on port 4040.
//17/12/29 14:15:49 INFO SparkUI: Started SparkUI at http://172.18.3.7:4040
//17/12/29 14:15:49 INFO Executor: Starting executor ID driver on host localhost
//17/12/29 14:15:50 INFO Utils: Successfully started service 'org.apache.spark.network.netty.NettyBlockTransferService' on port 55483.
//17/12/29 14:15:50 INFO NettyBlockTransferService: Server created on 55483
//17/12/29 14:15:50 INFO BlockManagerMaster: Trying to register BlockManager
//17/12/29 14:15:50 INFO BlockManagerMasterEndpoint: Registering block manager localhost:55483 with 467.6 MB RAM, BlockManagerId(driver, localhost, 55483)
//17/12/29 14:15:50 INFO BlockManagerMaster: Registered BlockManager
//+----+-------+
//| age| name|
//+----+-------+
//|null|Michael|
//| 30| Andy|
//| 19| Justin|
//+----+-------+
//
//root
// |-- age: long (nullable = true)
// |-- name: string (nullable = true)
//
//+-------+
//| name|
//+-------+
//|Michael|
//| Andy|
//| Justin|
//+-------+
//
//+-------+----------+
//| name|(age + 10)|
//+-------+----------+
//|Michael| null|
//| Andy| 40|
//| Justin| 29|
//+-------+----------+
//
//+---+----+
//|age|name|
//+---+----+
//| 30|Andy|
//+---+----+
//
//+----+-----+
//| age|count|
//+----+-----+
//|null| 1|
//| 19| 1|
//| 30| 1|
//+----+-----+

spark sql01的更多相关文章

  1. Spark踩坑记——Spark Streaming+Kafka

    [TOC] 前言 在WeTest舆情项目中,需要对每天千万级的游戏评论信息进行词频统计,在生产者一端,我们将数据按照每天的拉取时间存入了Kafka当中,而在消费者一端,我们利用了spark strea ...

  2. Spark RDD 核心总结

    摘要: 1.RDD的五大属性 1.1 partitions(分区) 1.2 partitioner(分区方法) 1.3 dependencies(依赖关系) 1.4 compute(获取分区迭代列表) ...

  3. spark处理大规模语料库统计词汇

    最近迷上了spark,写一个专门处理语料库生成词库的项目拿来练练手, github地址:https://github.com/LiuRoy/spark_splitter.代码实现参考wordmaker ...

  4. Hive on Spark安装配置详解(都是坑啊)

    个人主页:http://www.linbingdong.com 简书地址:http://www.jianshu.com/p/a7f75b868568 简介 本文主要记录如何安装配置Hive on Sp ...

  5. Spark踩坑记——数据库(Hbase+Mysql)

    [TOC] 前言 在使用Spark Streaming的过程中对于计算产生结果的进行持久化时,我们往往需要操作数据库,去统计或者改变一些值.最近一个实时消费者处理任务,在使用spark streami ...

  6. Spark踩坑记——初试

    [TOC] Spark简介 整体认识 Apache Spark是一个围绕速度.易用性和复杂分析构建的大数据处理框架.最初在2009年由加州大学伯克利分校的AMPLab开发,并于2010年成为Apach ...

  7. Spark读写Hbase的二种方式对比

    作者:Syn良子 出处:http://www.cnblogs.com/cssdongl 转载请注明出处 一.传统方式 这种方式就是常用的TableInputFormat和TableOutputForm ...

  8. (资源整理)带你入门Spark

    一.Spark简介: 以下是百度百科对Spark的介绍: Spark 是一种与 Hadoop 相似的开源集群计算环境,但是两者之间还存在一些不同之处,这些有用的不同之处使 Spark 在某些工作负载方 ...

  9. Spark的StandAlone模式原理和安装、Spark-on-YARN的理解

    Spark是一个内存迭代式运算框架,通过RDD来描述数据从哪里来,数据用那个算子计算,计算完的数据保存到哪里,RDD之间的依赖关系.他只是一个运算框架,和storm一样只做运算,不做存储. Spark ...

随机推荐

  1. iOS开发ffmpeg SDK 编译和集成

    FFmpeg是一套可以用来记录.转换数字音频.视频,并能将其转化为流的开源计算机程序.它提供了录制.转换以及流化音视频的完整解决方案.同时,FFmpeg是一套跨平台的方案,所以我们可以在iOS开发中使 ...

  2. day4:数据结构list

    1,一直输入用户名,输入Q退出,注意用户的输入别忘了加strip,和upper不区分大小写,list最后一位添加append li = [] while 1: name = input("& ...

  3. nginx rate limit

    nginx rate limithttps://www.topjishu.com/12139.htmlhttps://blog.csdn.net/hellow__world/article/detai ...

  4. [qemu][cloud][centos][ovs][sdn] centos7安装高版本的qemu 以及 virtio/vhost/vhost-user咋回事

    因为要搭建ovs-dpdk,所以需要vhost-user的qemu centos默认的qemu与qemu-kvm都不支持vhost-user,qemu最高版本是2.0.0, qemu-kvm最高版本是 ...

  5. oracle 11g/12c 密码复杂度验证设置

    ############################################################################### ###### 11g ###### ## ...

  6. glove理解

    先贴个看起来讲的不错的文章链接,后续详细看了再补充https://blog.csdn.net/leyounger/article/details/78206832 2018.10.28大概了解了下gl ...

  7. 前端 HTML body标签相关内容 常用标签 盒子标签 div

    盒子标签 div <div>可定义文档的分区 division的缩写 译:区 <div> 标签可以把文档分割为独立的.将他们进行分区 div在浏览器中,默认是不会增加任何的效果 ...

  8. 云serverlinux又一次挂载指定文件夹(非扩充)

    版权声明:本文为博主原创文章.转载请注明出处. https://blog.csdn.net/liuensong/article/details/27548771 新买的香港云server,系统仅仅能在 ...

  9. 使用Postgres,Nginx和Gunicorn将Django配置到服务器上

    先决条件 首先你得先在Digital Ocean上有一台自己的服务器,如果还没有,可参考教程如何在Digital Ocean上申请服务器. 为了方便起见,我将本教程分为两部分.第一部分(步骤1 - 6 ...

  10. what's the 爬虫之基本原理

    what's the 爬虫? 了解爬虫之前,我们首先要知道什么是互联网 1.什么是互联网? 互联网是由网络设备(网线,路由器,交换机,防火墙等等)和一台台计算机连接而成,总体上像一张网一样. 2.互联 ...