Python调用百度接口（情感倾向分析）和讯飞接口（语音识别、关键词提取）处理音频文件

本示例的过程是：

1. 音频转文本

2. 利用文本获取情感倾向分析结果

3. 利用文本获取关键词提取

首先是讯飞的语音识别模块。在这里可以找到非实时语音转写的相关文档以及 Python 示例。我略作了改动，让它可以对不同人说话作区分，并且作了一些封装。

语音识别功能

weblfasr_python3_demo.py 文件：

 #!/usr/bin/env python

 # -*- coding: utf-8 -*-

 """

 讯飞非实时转写调用demo（语音识别）

 """

 import base64

 import hashlib

 import hmac

 import json

 import os

 import time

 import requests

 lfasr_host = 'http://raasr.xfyun.cn/api'

 # 请求的接口名

 api_prepare = '/prepare'

 api_upload = '/upload'

 api_merge = '/merge'

 api_get_progress = '/getProgress'

 api_get_result = '/getResult'

 # 文件分片大下52k

 file_piece_sice = 10485760

 # ——————————————————转写可配置参数————————————————

 # 参数可在官网界面（https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html）查看，根据需求可自行在gene_params方法里添加修改

 # 转写类型

 lfasr_type = 0

 # 是否开启分词

 has_participle = 'false'

 has_seperate = 'true'

 # 多候选词个数

 max_alternatives = 0

 # 子用户标识

 suid = ''

 class SliceIdGenerator:

     """slice id生成器"""

     def __init__(self):

         self.__ch = 'aaaaaaaaa`'

     def getNextSliceId(self):

         ch = self.__ch

         j = len(ch) - 1

         while j >= 0:

             cj = ch[j]

             if cj != 'z':

                 ch = ch[:j] + chr(ord(cj) + 1) + ch[j + 1:]

                 break

             else:

                 ch = ch[:j] + 'a' + ch[j + 1:]

                 j = j - 1

         self.__ch = ch

         return self.__ch

 class RequestApi(object):

     def __init__(self, appid, secret_key, upload_file_path):

         self.appid = appid

         self.secret_key = secret_key

         self.upload_file_path = upload_file_path

     # 根据不同的apiname生成不同的参数,本示例中未使用全部参数您可在官网(https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html)查看后选择适合业务场景的进行更换

     def gene_params(self, apiname, taskid=None, slice_id=None):

         appid = self.appid

         secret_key = self.secret_key

         upload_file_path = self.upload_file_path

         ts = str(int(time.time()))

         m2 = hashlib.md5()

         m2.update((appid + ts).encode('utf-8'))

         md5 = m2.hexdigest()

         md5 = bytes(md5, encoding='utf-8')

         # 以secret_key为key, 上面的md5为msg， 使用hashlib.sha1加密结果为signa

         signa = hmac.new(secret_key.encode('utf-8'), md5, hashlib.sha1).digest()

         signa = base64.b64encode(signa)

         signa = str(signa, 'utf-8')

         file_len = os.path.getsize(upload_file_path)

         file_name = os.path.basename(upload_file_path)

         param_dict = {}

         if apiname == api_prepare:

             # slice_num是指分片数量，如果您使用的音频都是较短音频也可以不分片，直接将slice_num指定为1即可

             slice_num = int(file_len / file_piece_sice) + (0 if (file_len % file_piece_sice == 0) else 1)

             param_dict['app_id'] = appid

             param_dict['signa'] = signa

             param_dict['ts'] = ts

             param_dict['file_len'] = str(file_len)

             param_dict['file_name'] = file_name

             param_dict['slice_num'] = str(slice_num)

         elif apiname == api_upload:

             param_dict['app_id'] = appid

             param_dict['signa'] = signa

             param_dict['ts'] = ts

             param_dict['task_id'] = taskid

             param_dict['slice_id'] = slice_id

         elif apiname == api_merge:

             param_dict['app_id'] = appid

             param_dict['signa'] = signa

             param_dict['ts'] = ts

             param_dict['task_id'] = taskid

             param_dict['file_name'] = file_name

         elif apiname == api_get_progress or apiname == api_get_result:

             param_dict['app_id'] = appid

             param_dict['signa'] = signa

             param_dict['ts'] = ts

             param_dict['task_id'] = taskid

         param_dict['has_seperate'] = has_seperate

         return param_dict

     # 请求和结果解析，结果中各个字段的含义可参考：https://doc.xfyun.cn/rest_api/%E8%AF%AD%E9%9F%B3%E8%BD%AC%E5%86%99.html

     def gene_request(self, apiname, data, files=None, headers=None):

         response = requests.post(lfasr_host + apiname, data=data, files=files, headers=headers)

         result = json.loads(response.text)

         if result["ok"] == 0:

             # print("{} success:".format(apiname) + str(result))

             print('treating...')

             return result

         else:

             # print("{} error:".format(apiname) + str(result))

             exit(0)

             return result

     # 预处理

     def prepare_request(self):

         return self.gene_request(apiname=api_prepare,

                                  data=self.gene_params(api_prepare))

     # 上传

     def upload_request(self, taskid, upload_file_path):

         file_object = open(upload_file_path, 'rb')

         try:

             index = 1

             sig = SliceIdGenerator()

             while True:

                 content = file_object.read(file_piece_sice)

                 if not content or len(content) == 0:

                     break

                 files = {

                     "filename": self.gene_params(api_upload).get("slice_id"),

                     "content": content

                 }

                 response = self.gene_request(api_upload,

                                              data=self.gene_params(api_upload, taskid=taskid,

                                                                    slice_id=sig.getNextSliceId()),

                                              files=files)

                 if response.get('ok') != 0:

                     # 上传分片失败

                     print('upload slice fail, response: ' + str(response))

                     return False

                 # print('upload slice ' + str(index) + ' success')

                 print('treating...')

                 index += 1

         finally:

             'file index:' + str(file_object.tell())

             file_object.close()

         return True

     # 合并

     def merge_request(self, taskid):

         return self.gene_request(api_merge, data=self.gene_params(api_merge, taskid=taskid))

     # 获取进度

     def get_progress_request(self, taskid):

         return self.gene_request(api_get_progress, data=self.gene_params(api_get_progress, taskid=taskid))

     # 获取结果

     def get_result_request(self, taskid):

         return self.gene_request(api_get_result, data=self.gene_params(api_get_result, taskid=taskid))

     def all_api_request(self):

         # 1. 预处理

         pre_result = self.prepare_request()

         taskid = pre_result["data"]

         # 2 . 分片上传

         self.upload_request(taskid=taskid, upload_file_path=self.upload_file_path)

         # 3 . 文件合并

         self.merge_request(taskid=taskid)

         # 4 . 获取任务进度

         while True:

             # 每隔20秒获取一次任务进度

             progress = self.get_progress_request(taskid)

             progress_dic = progress

             if progress_dic['err_no'] != 0 and progress_dic['err_no'] != 26605:

                 # print('task error: ' + progress_dic['failed'])

                 return

             else:

                 data = progress_dic['data']

                 task_status = json.loads(data)

                 if task_status['status'] == 9:

                     # print('task ' + taskid + ' finished')

                     break

                 print('The task ' + taskid + ' is in processing, task status: ' + str(data))

                 print('processing...')

             # 每次获取进度间隔20S

             time.sleep(20)

         # 5 . 获取结果

         return self.get_result_request(taskid=taskid)

 def get_text_result(upload_file_path):

     """

     封装该接口，获取接口返回的内容

     :param upload_file_path:

     :return: 识别出来的文本数据

     """

     api = RequestApi(appid="xxx", secret_key="xxx", upload_file_path=upload_file_path)

     return api.all_api_request()

 # 注意：如果出现requests模块报错："NoneType" object has no attribute 'read', 请尝试将requests模块更新到2.20.0或以上版本(本demo测试版本为2.20.0)

 # 输入讯飞开放平台的appid，secret_key和待转写的文件路径

 if __name__ == '__main__':

     result = get_text_result('input/xxx.m4a')

     print(result)

     print(type(result))

appid 和 secret_key 需要你自己申请之后，配置上去。

配置好之后填写需要输入的音频，就可以运行该脚本作测试。

python weblfasr_python3_demo.py

treating...

treating...

treating...

treating...

treating...

The task e3e3284aee4a4e3b86a4fd506960e0f2 is in processing, task status: {"status":,"desc":"音频并完成"}

processing...

treating...

The task e3e3284aee4a4e3b86a4fd506960e0f2 is in processing, task status: {"status":,"desc":"音频写中"}

processing...

treating...

treating...

{'data': '[{"bg":"480","ed":"1810","onebest":"我好高兴！","speaker":"2"},{"bg":"1820","ed":"4440ebest":"啊明天就放假了！","speaker":"1"}]', 'err_no': , 'failed': None, 'ok': }

<class 'dict'>

情感倾向分析功能

这里是百度情感倾向分析的文档，可以选择 Python SDK 或者 API 接口，我选择的是 API 接口。并且我对它进行了一定程度的封装。

baidu_sentiment.py 文件有如下代码：

 #!/usr/bin/env python

 # -*- coding: utf-8 -*-

 """

 百度情感倾向分析：

 get_sentiment_result 用于 demo 进行调用

 # 参数    说明    描述

 # log_id    uint64    请求唯一标识码

 # sentiment    int    表示情感极性分类结果，0:负向，1:中性，2:正向

 # confidence    float    表示分类的置信度，取值范围[0,1]

 # positive_prob    float    表示属于积极类别的概率 ，取值范围[0,1]

 # negative_prob    float    表示属于消极类别的概率，取值范围[0,1]

 """

 import json

 import requests

 def get_sentiment_result(text):

     """

     利用情感倾向分析API来获取返回数据

     :param text: 输入文本

     :return response: 返回的响应

     """

     if text == '':

         return ''

     # 请求接口

     url = 'https://aip.baidubce.com/oauth/2.0/token'

     # 需要先获取一个 token

     client_id = 'xxx'

     client_secret = 'xxx'

     params = {

         'grant_type': 'client_credentials',

         'client_id': client_id,

         'client_secret': client_secret

     }

     headers = {'Content-Type': 'application/json; charset=UTF-8'}

     response = requests.post(url=url, params=params, headers=headers).json()

     access_token = response['access_token']

     # 通用版情绪识别接口

     url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify'

     # 定制版情绪识别接口

     # url = 'https://aip.baidubce.com/rpc/2.0/nlp/v1/sentiment_classify_custom'

     # 使用 token 调用情感倾向分析接口

     params = {

         'access_token': access_token

     }

     payload = json.dumps({

         'text': text

     })

     headers = {'Content-Type': 'application/json; charset=UTF-8'}

     response = requests.post(url=url, params=params, data=payload, headers=headers).json()

     return response

 if __name__ == '__main__':

     print(get_sentiment_result('白日放歌须纵酒，青春作伴好还乡。'))

     print(get_sentiment_result('思悠悠，恨悠悠，恨到归时方始休。'))

同样，你需要在百度创建应用，配置好你的 client_id 和 client_secret。你也可以运行该脚本进行测试。

python baidu_sentiment.py

{'log_id': , 'text': '白日放歌须纵酒，青春作伴好还乡。', 'items': [{'positive_prob': 0.537741, 'confidence': 0.245186, 'negative_prob': 0.462259, 'sentiment': }]}

{'log_id': , 'text': '思悠悠，恨悠悠，恨到归时方始休。', 'items': [{'positive_prob': 0.345277, 'confidence': 0.232717, 'negative_prob': 0.654723, 'sentiment': }]}

关键词提取功能

在这里可以找到讯飞的关键词提取的接口文档和示例代码。同样我也略作了改动，进行了封装。

WebLtp_python3_demo.py 文件代码：

 #!/usr/bin/python

 # -*- coding: UTF-8 -*-

 """

 讯飞关键词提取接口

 """

 import time

 import urllib.request

 import urllib.parse

 import json

 import hashlib

 import base64

 # 接口地址

 url = "http://ltpapi.xfyun.cn/v1/ke"

 # 开放平台应用ID

 x_appid = "xxx"

 # 开放平台应用接口秘钥

 api_key = "xxx"

 # 语言文本

 TEXT = "汉皇重色思倾国，御宇多年求不得。杨家有女初长成，养在深闺人未识。天生丽质难自弃，一朝选在君王侧。"

 def get_keyword_result(text):

     """

     这是讯飞官方文档给出的示例

     :param text: 输入文本

     :return response: 返回对象

     """

     if text == '':

         return ''

     body = urllib.parse.urlencode({'text': text}).encode('utf-8')

     param = {"type": "dependent"}

     x_param = base64.b64encode(json.dumps(param).replace(' ', '').encode('utf-8'))

     x_time = str(int(time.time()))

     x_checksum = hashlib.md5(api_key.encode('utf-8') +

                              str(x_time).encode('utf-8') +

                              x_param).hexdigest()

     x_header = {'X-Appid': x_appid,

                 'X-CurTime': x_time,

                 'X-Param': x_param,

                 'X-CheckSum': x_checksum}

     req = urllib.request.Request(url, body, x_header)

     result = urllib.request.urlopen(req)

     result = result.read()

     return result.decode('utf-8')

 if __name__ == '__main__':

     keyword_result = get_keyword_result(TEXT)

     print(keyword_result)

     print(type(keyword_result))

配置好你的 x_appid 和 api_key。

注意：关键词提取还需要你在讯飞应用的后台设置白名单。

点击管理，配置好自己的公网 IP。试着运行一下脚本，会有如下输出：

python WebLtp_python3_demo.py

{"code":"","data":{"ke":[{"score":"0.646","word":"汉皇"},{"score":"0.634","word":"御宇"},{"score":"0.633","word":"重色"},{"score":"0.632","word":"王侧"},{"score":"0.628","word":"思倾国"},{"score":"0.601","word":"自弃"},{"score":"0.600","word":"杨家"},{"score":"0.588","word":"深闺人未识"},{"score":"0.588","word":"求不得"},{"score":"0.586","word":"天生丽质"}]},"desc":"success","sid":"ltp000aed03@dx589210907749000100"}

<class 'str'>

把所有功能组合起来

用一个 Demo 把所有功能组合起来，并把结果存储到文件中。

demo.py 如下：

 #!/usr/bin/env python

 # -*- coding: utf-8 -*-

 """

 这是主要的demo

 流程是：

 音频->讯飞语音识别API->文本

 文本再作两种处理：

     文本->百度情绪识别API->情绪识别的响应

     文本->讯飞关键词提取API->关键词提取的响应

 """

 import sys

 import json

 from weblfasr_python3_demo import get_text_result

 from baidu_sentiment import get_sentiment_result

 from WebLtp_python3_demo import get_keyword_result

 # 硬编码选定需要离线分析的音频

 # 以下是一些测试--------------------------

 # SOURCE_PATH = 'input/test.mp3'

 # SOURCE_PATH = 'input/test.pcm'

 # SOURCE_PATH = 'input/test.m4a'

 # SOURCE_PATH = 'input/test.wav'

 # 以上是一些测试--------------------------

 # 或者，通过命令行参数选定需要离线分析的音频

 # 如：python demo.py test.wav

 SOURCE_PATH = 'input/' + sys.argv[1]

 # STEP 1: 调用讯飞语音识别 API

 # 获取讯飞识别出来的响应

 TEXT_RESULT = get_text_result(SOURCE_PATH)

 def save_file(data, destin):

     """

     数据持久化函数

     :param data: 数据

     :param destin: 目标路径

     :return: None

     """

     data = str(data)

     if data:

         with open(destin, "w", encoding='utf-8') as f:

             f.write(data)

 def whole_method():

     """

     将音频文本不作区分地提取（两个人的对话不做区分）

     :return: None

     """

     # 解析语音识别出来的数据

     data_list = json.loads(TEXT_RESULT['data'])

     # text 用于拼接

     text_result = ''

     for data in data_list:

         text_result += data['onebest']

     print('text_result:', text_result)

     print('text_result completed')

     # 把文本写入到文件中

     save_file(text_result, 'output/text_result.txt')

     # STEP 2: 情感倾向分析

     # 输入文本，使用情绪识别函数获取响应

     sentiment_result = get_sentiment_result(text_result)

     # 保存数据

     save_file(sentiment_result, 'output/sentiment_result.txt')

     print('sentiment_result completed')

     # STEP 3: 关键词提取

     # 输入文本，调用讯飞提取关键词的接口，对文本做关键词提取

     keyword_result = get_keyword_result(text_result)

     # 保存数据

     save_file(keyword_result, 'output/keyword_result.txt')

     print('keyword_result completed')

 def seperate_method():

     """

     将音频文本作区分地提取（区分两个人的对话）

     :return: None

     """

     data_list = json.loads(TEXT_RESULT['data'])

     text_result1 = ''

     text_result2 = ''

     # 假设有两个人，把文本分别做整合

     for data in data_list:

         # print(data)

         if data['speaker'] == '':

             text_result1 += data['onebest']

         else:

             text_result2 += data['onebest']

     print('text_result1', text_result1)

     print('text_result2', text_result2)

     print('text_result1 text_result2 completed')

     save_file(text_result1, 'output/text_result1.txt')

     save_file(text_result2, 'output/text_result2.txt')

     # STEP 2: 情感倾向分析

     # 输入文本，使用情绪识别函数获取响应

     # A 的对话

     sentiment_result1 = get_sentiment_result(text_result1)

     save_file(sentiment_result1, 'output/sentiment_result1.txt')

     print('result_get_result1 completed')

     # B 的对话

     sentiment_result2 = get_sentiment_result(text_result2)

     save_file(sentiment_result2, 'output/sentiment_result2.txt')

     print('result_get_result2 completed')

     # STEP 3: 关键词提取

     # 调用讯飞接口做文本的关键字提取

     # A 的对话

     keyword_result1 = get_keyword_result(text_result1)

     save_file(keyword_result1, 'output/keyword_result1.txt')

     print('keyword_result1 completed')

     # B 的对话

     keyword_result2 = get_keyword_result(text_result2)

     save_file(keyword_result2, 'output/keyword_result2.txt')

     print('keyword_result2 completed')

 if __name__ == '__main__':

     if TEXT_RESULT:

         whole_method()

         seperate_method()

输出大致如下：

python demo.py test.mp3

treating...

treating...

treating...

treating...

treating...

The task 8552d13470ed4839b11e0f3693f296f9 is in processing, task status: {"status":,"desc":"音频合并完成"}

processing...

treating...

...

The task 8552d13470ed4839b11e0f3693f296f9 is in processing, task status: {"status":,"desc":"音频转写中"}

processing...

treating...

treating...

text_result: 喂喂你好，是xxx的机主是吧？谁？呀我是xxx的工作人员，您在今天中午12点多在我们xxx提交了xxx是吧？那怎么？...那没有关系，我说您是否办理xxx？什么有什么有关系，啊有什么有关系啊。

text_result completed

sentiment_result completed

keyword_result completed

text_result1 喂喂你好，是xxx的机主是吧？呀我是xxx的工作人员，您在今天中午12点多在我们xxx提交了xxx是吧？...那没有关系，我说您是否办理xxx？

text_result2 谁？那怎么？...什么有什么有关系，啊有什么有关系啊。

text_result1 text_result2 completed

result_get_result1 completed

result_get_result2 completed

keyword_result1 completed

keyword_result2 completed

原文作者：雨先生
原文链接：https://www.cnblogs.com/noluye/p/11225024.html
许可协议：知识共享署名-非商业性使用 4.0 国际许可协议