Spark-PySpark sql各种内置函数

_functions = {

    'lit': 'Creates a :class:`Column` of literal value.',

    'col': 'Returns a :class:`Column` based on the given column name.'根据给定的列名返回一个：class：`Column`

    'column': 'Returns a :class:`Column` based on the given column name.',根据给定的列名返回一个：class：`Column`

    'asc': 'Returns a sort expression based on the ascending order of the given column name.',返回基于给定列名的升序的排序表达式

    'desc': 'Returns a sort expression based on the descending order of the given column name.',返回基于给定列名的降序的排序表达式

    'upper': 'Converts a string expression to upper case.',将字符串表达式转换为大写

    'lower': 'Converts a string expression to upper case.',将字符串表达式转换为大写

    'sqrt': 'Computes the square root of the specified float value.',计算指定浮点值的平方根

    'abs': 'Computes the absolute value.',计算绝对值

    'max': 'Aggregate function: returns the maximum value of the expression in a group.',聚合函数：返回组中表达式的最大值

    'min': 'Aggregate function: returns the minimum value of the expression in a group.',聚合函数：返回组中表达式的最小值

    'count': 'Aggregate function: returns the number of items in a group.',聚合函数：返回组中的项目数

    'sum': 'Aggregate function: returns the sum of all values in the expression.',聚合函数：返回表达式中所有值的总和

    'avg': 'Aggregate function: returns the average of the values in a group.',聚合函数：返回组中值的平均值

    'mean': 'Aggregate function: returns the average of the values in a group.',聚合函数：返回组中值的平均值

    'sumDistinct': 'Aggregate function: returns the sum of distinct values in the expression.',聚合函数：返回表达式中不同值的总和

}

_functions_1_4 = {

    # unary math functions

    'acos': 'Computes the cosine inverse of the given value; the returned angle is in the range' +

            '0.0 through pi.',

    'asin': 'Computes the sine inverse of the given value; the returned angle is in the range' +

            '-pi/2 through pi/2.',

    'atan': 'Computes the tangent inverse of the given value.',

    'cbrt': 'Computes the cube-root of the given value.',

    'ceil': 'Computes the ceiling of the given value.',

    'cos': 'Computes the cosine of the given value.',

    'cosh': 'Computes the hyperbolic cosine of the given value.',

    'exp': 'Computes the exponential of the given value.',

    'expm1': 'Computes the exponential of the given value minus one.',

    'floor': 'Computes the floor of the given value.',

    'log': 'Computes the natural logarithm of the given value.',

    'log10': 'Computes the logarithm of the given value in Base 10.',

    'log1p': 'Computes the natural logarithm of the given value plus one.',

    'rint': 'Returns the double value that is closest in value to the argument and' +

            ' is equal to a mathematical integer.',

    'signum': 'Computes the signum of the given value.',

    'sin': 'Computes the sine of the given value.',

    'sinh': 'Computes the hyperbolic sine of the given value.',

    'tan': 'Computes the tangent of the given value.',

    'tanh': 'Computes the hyperbolic tangent of the given value.',

    'toDegrees': '.. note:: Deprecated in 2.1, use degrees instead.',

    'toRadians': '.. note:: Deprecated in 2.1, use radians instead.',

    'bitwiseNOT': 'Computes bitwise not.',

}

_functions_1_6 = {

    # unary math functions

    'stddev': 'Aggregate function: returns the unbiased sample standard deviation of' +

              ' the expression in a group.',

    'stddev_samp': 'Aggregate function: returns the unbiased sample standard deviation of' +

                   ' the expression in a group.',

    'stddev_pop': 'Aggregate function: returns population standard deviation of' +

                  ' the expression in a group.',

    'variance': 'Aggregate function: returns the population variance of the values in a group.',

    'var_samp': 'Aggregate function: returns the unbiased variance of the values in a group.',

    'var_pop':  'Aggregate function: returns the population variance of the values in a group.',

    'skewness': 'Aggregate function: returns the skewness of the values in a group.',

    'kurtosis': 'Aggregate function: returns the kurtosis of the values in a group.',

    'collect_list': 'Aggregate function: returns a list of objects with duplicates.',

    'collect_set': 'Aggregate function: returns a set of objects with duplicate elements' +

                   ' eliminated.',

}

_functions_2_1 = {

    # unary math functions

    'degrees': 'Converts an angle measured in radians to an approximately equivalent angle ' +

               'measured in degrees.',

    'radians': 'Converts an angle measured in degrees to an approximately equivalent angle ' +

               'measured in radians.',

}

_functions_2_2 = {

    'to_date': 'Converts a string date into a DateType using the (optionally) specified format.',

    'to_timestamp': 'Converts a string timestamp into a timestamp type using the ' +

                    '(optionally) specified format.',

}

# math functions that take two arguments as input

_binary_mathfunctions = {

    'atan2': 'Returns the angle theta from the conversion of rectangular coordinates (x, y) to' +

             'polar coordinates (r, theta).',

    'hypot': 'Computes ``sqrt(a^2 + b^2)`` without intermediate overflow or underflow.',

    'pow': 'Returns the value of the first argument raised to the power of the second argument.',

}

_window_functions = {

    'row_number':

        """returns a sequential number starting at 1 within a window partition.""",

    'dense_rank':

        """returns the rank of rows within a window partition, without any gaps.

        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking

        sequence when there are ties. That is, if you were ranking a competition using dense_rank

        and had three people tie for second place, you would say that all three were in second

        place and that the next person came in third. Rank would give me sequential numbers, making

        the person that came in third place (after the ties) would register as coming in fifth.

        This is equivalent to the DENSE_RANK function in SQL.""",

    'rank':

        """returns the rank of rows within a window partition.

        The difference between rank and dense_rank is that dense_rank leaves no gaps in ranking

        sequence when there are ties. That is, if you were ranking a competition using dense_rank

        and had three people tie for second place, you would say that all three were in second

        place and that the next person came in third. Rank would give me sequential numbers, making

        the person that came in third place (after the ties) would register as coming in fifth.

        This is equivalent to the RANK function in SQL.""",

    'cume_dist':

        """returns the cumulative distribution of values within a window partition,

        i.e. the fraction of rows that are below the current row.""",

    'percent_rank':

        """returns the relative rank (i.e. percentile) of rows within a window partition.""",

}

Spark-PySpark sql各种内置函数的更多相关文章

SQL Server 内置函数、临时对象、流程控制
SQL Server 内置函数日期时间函数 --返回当前系统日期时间 select getdate() as [datetime],sysdatetime() as [datetime2] getd ...
[SQL]SUTFF内置函数的用法 (删除指定长度的字符并在指定的起始点插入另一组字符)
STUFF 删除指定长度的字符并在指定的起始点插入另一组字符. 语法 STUFF ( character_expression , start , length , character_express ...
10、SQL Server 内置函数、临时对象、流程控制
SQL Server 内置函数日期时间函数 --返回当前系统日期时间 select getdate() as [datetime],sysdatetime() as [datetime2] getd ...
sql server内置函数
MSDN标准文档:https://msdn.microsoft.com/zh-cn/library/ff848784(v=sql.120).aspx 配置函数 select @@servername ...
Sql Server内置函数实现MD5加密
实例 MD5加密“123456”: HashBytes('MD5','123456') 结果:0xE10ADC3949BA59ABBE56E057F20F883E (提示:看完最后,结果要进行转换.) ...
总结Sql Server内置函数实现MD5加密
--MD5加密 --HashBytes ('加密方式', '待加密的值') --加密方式= MD2 | MD4 | MD5 | SHA | SHA1 --返回值类型:varbinary(maximum ...
mysql 内置函数和sql server 内置函数的区别
以下函数均没有对参数做说明,使用的使用需要了解其参数内容数据库 sql server mysql oracle 举例获得当前系统时间 getdate() now() sysdate 注意不是函数 ...
SQL Server 内置函数实现MD5加密
一.MD5加密 HASHBYTES ('加密方式', '待加密的值') 加密方式= MD2 | MD4 | MD5 | SHA | SHA1 返回值类型:varbinary(maxim ...
[SQL]SUTFF内置函数的用法
STUFF 删除指定长度的字符并在指定的起始点插入另一组字符. 语法 STUFF ( character_expression , start , length , character_express ...

随机推荐

MyBatis动态SQL第一篇之实现多条件查询（if、where、trim标签）
一.动态SQL概述以前在使用JDBC操作数据时,如果查询条件特别多,将条件串联成SQL字符串是一件痛苦的事情.通常的解决方法是写很多的if-else条件语句对字符串进行拼接,并确保不能忘了空格或在字 ...
JavaScript使用readAsDataURL读取图像文件
JavaScript使用readAsDataURL读取图像文件 FileReader对象的readAsDataURL方法可以将读取到的文件编码成Data URL.Data URL是一项特殊的技术,可以 ...
vue-router动态路由设置参数可选
在日常工作中,我们需要将匹配到的所有路由,映射到一个组件上. 如下代码想要达到的效果: 不传page和id,则映射到user默认list页面传page和id,根据page不同,显示不同的页面问题 ...
Vue中自定义指令的使用方法！
除了核心功能默认内置的指令 (v-model 和 v-show),Vue 也允许注册自定义指令.注意,在 Vue2.0 中,代码复用和抽象的主要形式是组件.然而,有的情况下,你仍然需要对普通 DOM ...
WPF中关于合并资源字典
一.本项目中 <ResourceDictionary> <ResourceDictionary.MergedDictionaries> <!--<ResourceD ...
springBoot 打包上线跳过连接数据库
在pom文件下添加 <skipTests>true</skipTests> 这一行如下: <properties> <project.build.sourc ...
oracle服务端字符集
一.oracle服务端字符集 SQL> select userenv('language') from dual ; USERENV('LANGUAGE')------------------- ...
logging：不喜欢写日志可不好哦
logging模块简介 logging模块是python内置的标准模块,主要用于输出程序的运行日志. 可以设置输出日志的等级,日志保存路径,日志文件回滚等等. logging模块的基本使用 impor ...
基于mini2440嵌入式Linux根文件系统制作(Initramfs和nfs两种跟文件系统)
嵌入式系统由三部分构成: 1.bootoader---bootparameters---2.kernel 3.Root-filesysytem 一个内核可以挂载多个文件系统,但是有一个根文件系统所以叫 ...
dedecms织梦做中英文(多语言)网站步骤详解
用dedecms织梦程序如何做中英文网站,下面是一个详细的图文教程,希望能帮助到大家. 以下是用dedecms织梦程序制作过的一个5国语言网站,下面开始教程. 一.首先在后台建栏目,有三点需要注意 1 ...

Spark-PySpark sql各种内置函数

Spark-PySpark sql各种内置函数的更多相关文章

随机推荐

热门专题