sedona(Geospark)读取csv

package com.grady.sedona

import org.apache.sedona.sql.utils.SedonaSQLRegistrator

import org.apache.sedona.viz.core.Serde.SedonaVizKryoRegistrator

import org.apache.spark.serializer.KryoSerializer

import org.apache.spark.sql.SparkSession

object SedonaReadCsv {

  // hdfs 文件位置

  val csvPointInputLocation = "/tmp/jiang/" + "testpoint.csv"

  def main(args: Array[String]): Unit = {

    val ss:SparkSession = SparkSession.builder()

      .config("spark.serializer",classOf[KryoSerializer].getName)

      .config("spark.kryo.registrator", classOf[SedonaVizKryoRegistrator].getName)

      .appName("SedonaAnalysisScv").getOrCreate()

    SedonaSQLRegistrator.registerAll(ss)

    readCsv(ss)

    ss.stop()

  }

  def readCsv(ss: SparkSession): Unit = {

    val pointCsvDF = ss.read

      .format("csv")

      .option("delimiter",",")

      .option("header","false")

      .load(csvPointInputLocation)

    pointCsvDF.createOrReplaceTempView("test_point_csv")

    pointCsvDF.show(10)

    val pointDF = ss.sql("select ST_Point(cast(test_point_csv._c0 as Decimal(24,20)),cast(test_point_csv._c1 as Decimal(24,20))) as pointshape from test_point_csv")

    pointCsvDF.createOrReplaceTempView("test_point")

    pointDF.show()

  }

}

pom.xml

<?xml version="1.0" encoding="UTF-8"?>

<project xmlns="http://maven.apache.org/POM/4.0.0"

         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"

         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">

    <parent>

        <artifactId>spark-practise</artifactId>

        <groupId>org.example</groupId>

        <version>1.0-SNAPSHOT</version>

    </parent>

    <modelVersion>4.0.0</modelVersion>

    <artifactId>sedona</artifactId>

    <properties>

        <maven.compiler.source>8</maven.compiler.source>

        <maven.compiler.target>8</maven.compiler.target>

    </properties>

    <dependencies>

        <dependency>

            <groupId>org.apache.spark</groupId>

            <artifactId>spark-core_${scala.binary.version}</artifactId>

            <version>${spark.version}</version>

            <scope>provided</scope>

        </dependency>

        <dependency>

            <groupId>org.apache.spark</groupId>

            <artifactId>spark-sql_${scala.binary.version}</artifactId>

            <version>${spark.version}</version>

            <scope>provided</scope>

        </dependency>

        <dependency>

            <groupId>org.apache.spark</groupId>

            <artifactId>spark-yarn_${scala.binary.version}</artifactId>

            <version>${spark.version}</version>

            <scope>provided</scope>

        </dependency>

        <!-- sedona -->

        <dependency>

            <groupId>org.apache.sedona</groupId>

            <artifactId>sedona-core-3.0_2.12</artifactId>

            <version>1.1.1-incubating</version>

        </dependency>

        <dependency>

            <groupId>org.apache.sedona</groupId>

            <artifactId>sedona-sql-3.0_2.12</artifactId>

            <version>1.1.1-incubating</version>

        </dependency>

        <dependency>

            <groupId>org.apache.sedona</groupId>

            <artifactId>sedona-viz-3.0_2.12</artifactId>

            <version>1.1.1-incubating</version>

        </dependency>

        <dependency>

            <groupId>org.locationtech.jts</groupId>

            <artifactId>jts-core</artifactId>

            <version>1.18.0</version>

        </dependency>

    </dependencies>

    <build>

        <resources>

            <resource>

                <directory>src/main/resources</directory>

                <filtering>true</filtering>

            </resource>

        </resources>

        <plugins>

            <plugin>

                <groupId>net.alchim31.maven</groupId>

                <artifactId>scala-maven-plugin</artifactId>

                <version>3.2.1</version>

                <configuration>

                    <source>1.8</source>

                    <target>1.8</target>

                    <scalaVersion>${scala.version}</scalaVersion>

                </configuration>

                <executions>

                    <execution>

                        <id>scala-compile-first</id>

                        <phase>process-resources</phase>

                        <goals>

                            <goal>add-source</goal>

                            <goal>compile</goal>

                        </goals>

                    </execution>

                    <execution>

                        <id>scala-test-compile</id>

                        <phase>process-test-resources</phase>

                        <goals>

                            <goal>testCompile</goal>

                        </goals>

                    </execution>

                </executions>

            </plugin>

            <plugin>

                <groupId>org.apache.maven.plugins</groupId>

                <artifactId>maven-shade-plugin</artifactId>

                <version>3.2.1</version>

                <configuration>

                    <artifactSet>

                        <excludes>

                            <exclude>org.slf4j:*</exclude>

                        </excludes>

                    </artifactSet>

                </configuration>

                <executions>

                    <execution>

                        <phase>package</phase>

                        <goals>

                            <goal>shade</goal>

                        </goals>

                        <configuration>

                            <createDependencyReducedPom>false</createDependencyReducedPom>

                            <filters>

                                <filter>

                                    <artifact>*:*</artifact>

                                    <excludes>

                                        <exclude>META-INF/*.SF</exclude>

                                        <exclude>META-INF/*.DSA</exclude>

                                        <exclude>META-INF/*.RSA</exclude>

                                    </excludes>

                                </filter>

                            </filters>

                            <transformers>

                                <transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />

                            </transformers>

                        </configuration>

                    </execution>

                </executions>

            </plugin>

        </plugins>

    </build>

</project>

执行spark-submit --master yarn --driver-memory=2G --class com.grady.sedona.SedonaReadCsv /app/data/appdeploy/sedona-1.0-SNAPSHOT.jar

日志:

+----+-----+

| _c0|  _c1|

+----+-----+

| 1.1|101.1|

| 2.1|102.1|

| 3.1|103.1|

| 4.1|104.1|

| 5.1|105.1|

| 6.1|106.1|

| 7.1|107.1|

| 8.1|108.1|

| 9.1|109.1|

|10.1|110.1|

+----+-----+

+------------------+

|        pointshape|

+------------------+

| POINT (1.1 101.1)|

| POINT (2.1 102.1)|

| POINT (3.1 103.1)|

| POINT (4.1 104.1)|

| POINT (5.1 105.1)|

| POINT (6.1 106.1)|

| POINT (7.1 107.1)|

| POINT (8.1 108.1)|

| POINT (9.1 109.1)|

|POINT (10.1 110.1)|

|POINT (11.1 111.1)|

|POINT (12.1 112.1)|

|POINT (13.1 113.1)|

|POINT (14.1 114.1)|

|POINT (15.1 115.1)|

|POINT (16.1 116.1)|

|POINT (17.1 117.1)|

|POINT (18.1 118.1)|

|POINT (19.1 119.1)|

|POINT (20.1 120.1)|

+------------------+

only showing top 20 rows

sedona(Geospark)读取csv的更多相关文章

sparkR读取csv文件
sparkR读取csv文件 The general method for creating SparkDataFrames from data sources is read.df. This met ...
C# 读取 CSV 文件
最近做一个C#项目要导入CSV文件中的数据到Oracle中,使用Aspose.Cells读取中文字段标题却乱码,表的最后多出几行null记录,而且不是免费的,后来找到了NPOI,顾名思义,就是POI的 ...
PHP读取CSV数据写入数据库
/*读取csv文件*/ public function testCsv(){ $fileName = "tel.csv"; $fp=fopen($fileName,"r& ...
VB6.0 读取CSV文件
最近做了一个Upload文件的需求,文件的格式为CSV,读取文件的方法整理了一下,如下: 1.先写了一个读取CSV文件的Function: '读取CSV文件 '假设传入的参数strFile=C:\Do ...
php读取csv文件，在linux上出现中文读取不到的情况解决方法
今,php读取csv文件,在linux上出现中文读取不到的情况,google,后找到解决办法<?phpsetlocale(LC_ALL, 'zh_CN');$row = 1;$handle = ...
内容写到 csv 格式的文件中及读取 csv 格式的文件内容
<?php/*把内容写到 csv 格式的文件中基本思路是:1.用 $fp = fopen("filename", 'mode')打开一个csv文件,可以是打开时才建立的2. ...
Unity 读取CSV与Excel
前几天看到我们在游戏中需要动态加载某些角色的游戏策划值,关于这个问题怎么解决呢?其实办法很多种,归根到底,就是数据的读取.我们可以想到的存储数据的载体有很多.例如:txt,xml,csv,excel. ...
使用univocity-parsers创建和读取csv文件
import com.univocity.parsers.csv.CsvFormat;import com.univocity.parsers.csv.CsvParser;import com.uni ...
PHP读取CSV大文件导入数据库的示例
对于数百万条数据量的CSV文件,文件大小可能达到数百M,如果简单读取的话很可能出现超时或者卡死的现象. 为了成功将CSV文件里的数据导入数据库,分批处理是非常必要的. 下面这个函数是读取CSV文件中指 ...

随机推荐

sql-关键词的大小写与注释
是否区分大小写和注释大小写 oracle 自带的sqlplus: mysql 客户端 : Navicat: 注释 oracle 自带的sqlplus: mysql 客户端 : 小节 oracle ...
python小题目练习（一）
题目:输出1+2+3+4+5+--+100的总数,并打印出这行式子代码展示:# 1.定义一个初识变量total,用于后面每次循环进行累加值 total = 0# 2.利用for循环遍历累加for i ...
校验日期格式为yyyy-MM-dd
/** * 校验时间 * * @param text * @return */ public static boolean checkTime(String text) { DateFormat fo ...
HTTP Status 405 - Request method 'GET' not supported？（尚硅谷Restful案例练习关于Delete方法出现的错误）
哈罗大家好,最近在如火如荼的学习java开发----Spring系列框架,当学习到SpringMVC,动手实践RESTFUL案例时,发现了以上报错405,get请求方法没有被支持. 首先第一步,我查看 ...
appium简单使用
App 测试通常会用到的工具 adb :Android 的控制工具,用于获取Android的各种数据和控制 Appium Desktop:内嵌了Appium Server和Inspector的综合工具 ...
虚拟机启动时报’A start job is running for /etc/rc.local .. Compatibility错误。
虚拟机启动时报'A start job is running for /etc/rc.local .. Compatibility错误. 问题已经存在很长时间了,但是不影响ssh登录,遂置之未理. 经 ...
活动报名 | 如何基于开源项目 Tapdata PDK，快速完成数据源和目标的开发？
近日,Tapdata 启动 PDK 插件生态共建计划,宣布开源插件开发框架 Tapdata PDK,将自身的数据接口能力开放出来,帮助开发者根据实际需求,自助接入数据源和目标,快速开启「Data ...
Centos7中配置NIS：用户账号管理
NIS:网络信息服务 Linux系统中用户按地域分两类:本地用户,远程用户(NIS.LDAP.AD)模式:C/S模式 ypbind是定义NIS服务器的客户端进程,一旦确定了服务器位置,客户端绑定到了服 ...
Linux学习系列--用户（组）新增、查看和删除
在实际的工作中,在接触Linux的用户组管理的时候,一般来说都是在系统开建设的时候设置好,root权限由特定的负责人保管用户密码,避免误操作带来不必要的麻烦. 在具体使用的时候,会利用相关的命令设置一 ...
C# 虚方法、抽象方法
一.虚方法(virtual) 作用:当有一个定义在类中的函数需要在继承类中实现时,可以使用虚方法. 示例: class Person { public virtual void XXX() { Con ...

sedona(Geospark)读取csv

sedona(Geospark)读取csv的更多相关文章

随机推荐

热门专题