步骤

1)安装R。windows操作系统安装包的链接:https://cran.r-project.org/bin/windows/base/

2)切换当前路径为脚本所在路径

点击 文件 > 改变工作目录

3)运行脚本

点击 文件 > 运行R脚本文件

如果希望自己生成训练数据,就运行生成训练数据的脚本。如果只是想生成测试数据,就运行生成测试数据的脚本。

生成训练数据的脚本

将男声的音频文件置于male文件夹下,将女声的音频文件置于female文件夹下

packages <- c('tuneR', 'seewave', 'fftw', 'caTools', 'warbleR', 'mice', 'e1071', 'rpart', 'e1071')
if (length(setdiff(packages, rownames(installed.packages()))) > ) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
library(tuneR)
library(seewave)
library(caTools)
library(rpart) library(warbleR)
library(mice)
library(e1071) specan3 <- function(X, bp = c(,), wl = , threshold = , parallel = ){
# To use parallel processing: library(devtools), install_github('nathanvan/parallelsugar')
if(class(X) == "data.frame") {if(all(c("sound.files", "selec",
"start", "end") %in% colnames(X)))
{
start <- as.numeric(unlist(X$start))
end <- as.numeric(unlist(X$end))
sound.files <- as.character(unlist(X$sound.files))
selec <- as.character(unlist(X$selec))
} else stop(paste(paste(c("sound.files", "selec", "start", "end")[!(c("sound.files", "selec",
"start", "end") %in% colnames(X))], collapse=", "), "column(s) not found in data frame"))
} else stop("X is not a data frame") #if there are NAs in start or end stop
if(any(is.na(c(end, start)))) stop("NAs found in start and/or end") #if end or start are not numeric stop
if(all(class(end) != "numeric" & class(start) != "numeric")) stop("'end' and 'selec' must be numeric") #if any start higher than end stop
if(any(end - start<)) stop(paste("The start is higher than the end in", length(which(end - start<)), "case(s)")) #if any selections longer than 20 secs stop
if(any(end - start>)) stop(paste(length(which(end - start>)), "selection(s) longer than 20 sec"))
options( show.error.messages = TRUE) #if bp is not vector or length!=2 stop
if(!is.vector(bp)) stop("'bp' must be a numeric vector of length 2") else{
if(!length(bp) == ) stop("'bp' must be a numeric vector of length 2")} #return warning if not all sound files were found
fs <- list.files(path = getwd(), pattern = ".wav$", ignore.case = TRUE)
if(length(unique(sound.files[(sound.files %in% fs)])) != length(unique(sound.files)))
cat(paste(length(unique(sound.files))-length(unique(sound.files[(sound.files %in% fs)])),
".wav file(s) not found")) #count number of sound files in working directory and if stop
d <- which(sound.files %in% fs)
if(length(d) == ){
stop("The .wav files are not in the working directory")
} else {
start <- start[d]
end <- end[d]
selec <- selec[d]
sound.files <- sound.files[d]
} # If parallel is not numeric
if(!is.numeric(parallel)) stop("'parallel' must be a numeric vector of length 1")
if(any(!(parallel %% == ),parallel < )) stop("'parallel' should be a positive integer") # If parallel was called
if(parallel > )
{ options(warn = -)
if(all(Sys.info()[] == "Windows",requireNamespace("parallelsugar", quietly = TRUE) == TRUE))
lapp <- function(X, FUN) parallelsugar::mclapply(X, FUN, mc.cores = parallel) else
if(Sys.info()[] == "Windows"){
cat("Windows users need to install the 'parallelsugar' package for parallel computing (you are not doing it now!)")
lapp <- pbapply::pblapply} else lapp <- function(X, FUN) parallel::mclapply(X, FUN, mc.cores = parallel)} else lapp <- pbapply::pblapply options(warn = ) if(parallel == ) cat("Measuring acoustic parameters:")
x <- as.data.frame(lapp(:length(start), function(i) {
r <- tuneR::readWave(file.path(getwd(), sound.files[i]), from = start[i], to = end[i], units = "seconds") b<- bp #in case bp its higher than can be due to sampling rate
if(b[] > ceiling(r@samp.rate/) - ) b[] <- ceiling(r@samp.rate/) - #frequency spectrum analysis
songspec <- seewave::spec(r, f = r@samp.rate, plot = FALSE)
analysis <- seewave::specprop(songspec, f = r@samp.rate, flim = c(, /), plot = FALSE) #save parameters
meanfreq <- analysis$mean/
sd <- analysis$sd/
median <- analysis$median/
Q25 <- analysis$Q25/
Q75 <- analysis$Q75/
IQR <- analysis$IQR/
skew <- analysis$skewness
kurt <- analysis$kurtosis
sp.ent <- analysis$sh
sfm <- analysis$sfm
mode <- analysis$mode/
centroid <- analysis$cent/ #Frequency with amplitude peaks
peakf <- #seewave::fpeaks(songspec, f = r@samp.rate, wl = wl, nmax = , plot = FALSE)[, ] #Fundamental frequency parameters
ff <- seewave::fund(r, f = r@samp.rate, ovlp = , threshold = threshold,
fmax = , ylim=c(, /), plot = FALSE, wl = wl)[, ]
meanfun<-mean(ff, na.rm = T)
minfun<-min(ff, na.rm = T)
maxfun<-max(ff, na.rm = T) #Dominant frecuency parameters
y <- seewave::dfreq(r, f = r@samp.rate, wl = wl, ylim=c(, /), ovlp = , plot = F, threshold = threshold, bandpass = b * , fftw = TRUE)[, ]
meandom <- mean(y, na.rm = TRUE)
mindom <- min(y, na.rm = TRUE)
maxdom <- max(y, na.rm = TRUE)
dfrange <- (maxdom - mindom)
duration <- (end[i] - start[i]) #modulation index calculation
changes <- vector()
for(j in which(!is.na(y))){
change <- abs(y[j] - y[j + ])
changes <- append(changes, change)
}
if(mindom==maxdom) modindx<- else modindx <- mean(changes, na.rm = T)/dfrange #save results
return(c(duration, meanfreq, sd, median, Q25, Q75, IQR, skew, kurt, sp.ent, sfm, mode,
centroid, peakf, meanfun, minfun, maxfun, meandom, mindom, maxdom, dfrange, modindx))
})) #change result names rownames(x) <- c("duration", "meanfreq", "sd", "median", "Q25", "Q75", "IQR", "skew", "kurt", "sp.ent",
"sfm","mode", "centroid", "peakf", "meanfun", "minfun", "maxfun", "meandom", "mindom", "maxdom", "dfrange", "modindx")
x <- data.frame(sound.files, selec, as.data.frame(t(x)))
colnames(x)[:] <- c("sound.files", "selec")
rownames(x) <- c(:nrow(x)) return(x)
} processFolder <- function(folderName) {
# Start with empty data.frame.
data <- data.frame() # Get list of files in the folder.
list <- list.files(folderName, '\\.wav') # Add file list to data.frame for processing.
for (fileName in list) {
row <- data.frame(fileName, , , )
data <- rbind(data, row)
} # Set column names.
names(data) <- c('sound.files', 'selec', 'start', 'end') # Move into folder for processing.
setwd(folderName) # Process files.
acoustics <- specan3(data, parallel=) # Move back into parent folder.
setwd('..') acoustics
} gender <- function(filePath) {
if (!exists('genderBoosted')) {
load('model.bin')
} # Setup paths.
currentPath <- getwd()
fileName <- basename(filePath)
path <- dirname(filePath) # Set directory to read file.
setwd(path) # Start with empty data.frame.
data <- data.frame(fileName, , , ) # Set column names.
names(data) <- c('sound.files', 'selec', 'start', 'end') # Process files.
acoustics <- specan3(data, parallel=) # Restore path.
setwd(currentPath) predict(genderCombo, newdata=acoustics)
} # Load data
males <- processFolder('male')
females <- processFolder('female') # Set labels.
males$label <-
females$label <-
data <- rbind(males, females)
data$label <- factor(data$label, labels=c('male', 'female')) # Remove unused columns.
data$duration <- NULL
data$sound.files <- NULL
data$selec <- NULL
data$peakf <- NULL # Remove rows containing NA's.
data <- data[complete.cases(data),] # Write out csv dataset.
write.csv(data, file='voice.csv', sep=',', row.names=F)

meelo

生成测试数据的脚本

将测试音频文件置于test文件夹下

packages <- c('tuneR', 'seewave', 'fftw', 'caTools', 'warbleR', 'mice', 'e1071', 'rpart', 'e1071')
if (length(setdiff(packages, rownames(installed.packages()))) > ) {
install.packages(setdiff(packages, rownames(installed.packages())))
}
library(tuneR)
library(seewave)
library(caTools)
library(rpart) library(warbleR)
library(mice)
library(e1071) specan3 <- function(X, bp = c(,), wl = , threshold = , parallel = ){
# To use parallel processing: library(devtools), install_github('nathanvan/parallelsugar')
if(class(X) == "data.frame") {if(all(c("sound.files", "selec",
"start", "end") %in% colnames(X)))
{
start <- as.numeric(unlist(X$start))
end <- as.numeric(unlist(X$end))
sound.files <- as.character(unlist(X$sound.files))
selec <- as.character(unlist(X$selec))
} else stop(paste(paste(c("sound.files", "selec", "start", "end")[!(c("sound.files", "selec",
"start", "end") %in% colnames(X))], collapse=", "), "column(s) not found in data frame"))
} else stop("X is not a data frame") #if there are NAs in start or end stop
if(any(is.na(c(end, start)))) stop("NAs found in start and/or end") #if end or start are not numeric stop
if(all(class(end) != "numeric" & class(start) != "numeric")) stop("'end' and 'selec' must be numeric") #if any start higher than end stop
if(any(end - start<)) stop(paste("The start is higher than the end in", length(which(end - start<)), "case(s)")) #if any selections longer than 20 secs stop
if(any(end - start>)) stop(paste(length(which(end - start>)), "selection(s) longer than 20 sec"))
options( show.error.messages = TRUE) #if bp is not vector or length!=2 stop
if(!is.vector(bp)) stop("'bp' must be a numeric vector of length 2") else{
if(!length(bp) == ) stop("'bp' must be a numeric vector of length 2")} #return warning if not all sound files were found
fs <- list.files(path = getwd(), pattern = ".wav$", ignore.case = TRUE)
if(length(unique(sound.files[(sound.files %in% fs)])) != length(unique(sound.files)))
cat(paste(length(unique(sound.files))-length(unique(sound.files[(sound.files %in% fs)])),
".wav file(s) not found")) #count number of sound files in working directory and if stop
d <- which(sound.files %in% fs)
if(length(d) == ){
stop("The .wav files are not in the working directory")
} else {
start <- start[d]
end <- end[d]
selec <- selec[d]
sound.files <- sound.files[d]
} # If parallel is not numeric
if(!is.numeric(parallel)) stop("'parallel' must be a numeric vector of length 1")
if(any(!(parallel %% == ),parallel < )) stop("'parallel' should be a positive integer") # If parallel was called
if(parallel > )
{ options(warn = -)
if(all(Sys.info()[] == "Windows",requireNamespace("parallelsugar", quietly = TRUE) == TRUE))
lapp <- function(X, FUN) parallelsugar::mclapply(X, FUN, mc.cores = parallel) else
if(Sys.info()[] == "Windows"){
cat("Windows users need to install the 'parallelsugar' package for parallel computing (you are not doing it now!)")
lapp <- pbapply::pblapply} else lapp <- function(X, FUN) parallel::mclapply(X, FUN, mc.cores = parallel)} else lapp <- pbapply::pblapply options(warn = ) if(parallel == ) cat("Measuring acoustic parameters:")
x <- as.data.frame(lapp(:length(start), function(i) {
r <- tuneR::readWave(file.path(getwd(), sound.files[i]), from = start[i], to = end[i], units = "seconds") b<- bp #in case bp its higher than can be due to sampling rate
if(b[] > ceiling(r@samp.rate/) - ) b[] <- ceiling(r@samp.rate/) - #frequency spectrum analysis
songspec <- seewave::spec(r, f = r@samp.rate, plot = FALSE)
analysis <- seewave::specprop(songspec, f = r@samp.rate, flim = c(, /), plot = FALSE) #save parameters
meanfreq <- analysis$mean/
sd <- analysis$sd/
median <- analysis$median/
Q25 <- analysis$Q25/
Q75 <- analysis$Q75/
IQR <- analysis$IQR/
skew <- analysis$skewness
kurt <- analysis$kurtosis
sp.ent <- analysis$sh
sfm <- analysis$sfm
mode <- analysis$mode/
centroid <- analysis$cent/ #Frequency with amplitude peaks
peakf <- #seewave::fpeaks(songspec, f = r@samp.rate, wl = wl, nmax = , plot = FALSE)[, ] #Fundamental frequency parameters
ff <- seewave::fund(r, f = r@samp.rate, ovlp = , threshold = threshold,
fmax = , ylim=c(, /), plot = FALSE, wl = wl)[, ]
meanfun<-mean(ff, na.rm = T)
minfun<-min(ff, na.rm = T)
maxfun<-max(ff, na.rm = T) #Dominant frecuency parameters
y <- seewave::dfreq(r, f = r@samp.rate, wl = wl, ylim=c(, /), ovlp = , plot = F, threshold = threshold, bandpass = b * , fftw = TRUE)[, ]
meandom <- mean(y, na.rm = TRUE)
mindom <- min(y, na.rm = TRUE)
maxdom <- max(y, na.rm = TRUE)
dfrange <- (maxdom - mindom)
duration <- (end[i] - start[i]) #modulation index calculation
changes <- vector()
for(j in which(!is.na(y))){
change <- abs(y[j] - y[j + ])
changes <- append(changes, change)
}
if(mindom==maxdom) modindx<- else modindx <- mean(changes, na.rm = T)/dfrange #save results
return(c(duration, meanfreq, sd, median, Q25, Q75, IQR, skew, kurt, sp.ent, sfm, mode,
centroid, peakf, meanfun, minfun, maxfun, meandom, mindom, maxdom, dfrange, modindx))
})) #change result names rownames(x) <- c("duration", "meanfreq", "sd", "median", "Q25", "Q75", "IQR", "skew", "kurt", "sp.ent",
"sfm","mode", "centroid", "peakf", "meanfun", "minfun", "maxfun", "meandom", "mindom", "maxdom", "dfrange", "modindx")
x <- data.frame(sound.files, selec, as.data.frame(t(x)))
colnames(x)[:] <- c("sound.files", "selec")
rownames(x) <- c(:nrow(x)) return(x)
} processFolder <- function(folderName) {
# Start with empty data.frame.
data <- data.frame() # Get list of files in the folder.
list <- list.files(folderName, '\\.wav') # Add file list to data.frame for processing.
for (fileName in list) {
row <- data.frame(fileName, , , )
data <- rbind(data, row)
} # Set column names.
names(data) <- c('sound.files', 'selec', 'start', 'end') # Move into folder for processing.
setwd(folderName) # Process files.
acoustics <- specan3(data, parallel=) # Move back into parent folder.
setwd('..') acoustics
} gender <- function(filePath) {
if (!exists('genderBoosted')) {
load('model.bin')
} # Setup paths.
currentPath <- getwd()
fileName <- basename(filePath)
path <- dirname(filePath) # Set directory to read file.
setwd(path) # Start with empty data.frame.
data <- data.frame(fileName, , , ) # Set column names.
names(data) <- c('sound.files', 'selec', 'start', 'end') # Process files.
acoustics <- specan3(data, parallel=) # Restore path.
setwd(currentPath) predict(genderCombo, newdata=acoustics)
} # Load data
data <- processFolder('test') # Remove unused columns.
data$duration <- NULL
data$sound.files <- NULL
data$selec <- NULL
data$peakf <- NULL # Remove rows containing NA's.
data <- data[complete.cases(data),] # Write out csv dataset.
write.csv(data, file='test.csv', sep=',', row.names=F)

meelo

语音性别识别 - 使用R提取特征的更多相关文章

  1. 论文笔记:语音情感识别(三)手工特征+CRNN

    一:Emotion Recognition from Human Speech Using Temporal Information and Deep Learning(2018 InterSpeec ...

  2. 论文笔记:语音情感识别(四)语音特征之声谱图,log梅尔谱,MFCC,deltas

    一:原始信号 从音频文件中读取出来的原始语音信号通常称为raw waveform,是一个一维数组,长度是由音频长度和采样率决定,比如采样率Fs为16KHz,表示一秒钟内采样16000个点,这个时候如果 ...

  3. C++开发人脸性别识别教程(12)——加入性别识别功能

    经过之前几篇博客的解说,我们已经成功搭建了MFC应用框架,并实现了主要的图像显示和人脸检測程序,在这篇博文中我们要向当中加入性别识别代码. 关于性别识别,之前已经专门拿出两篇博客的篇幅来进行解说.这里 ...

  4. 论文笔记:语音情感识别(五)语音特征集之eGeMAPS,ComParE,09IS,BoAW

    一:LLDs特征和HSFs特征 (1)首先区分一下frame和utterance,frame就是一帧语音.utterance是一段语音,是比帧高一级的语音单位,通常指一句话,一个语音样本.uttera ...

  5. 论文笔记:语音情感识别(二)声谱图+CRNN

    一:An Attention Pooling based Representation Learning Method for Speech Emotion Recognition(2018 Inte ...

  6. 基于人脸识别+IMDB-WIFI+Caffe的性别识别

    本文用记录基于Caffe的人脸性别识别过程.基于imdb-wiki模型做finetune,imdb-wiki数据集合模型可从这里下载:https://data.vision.ee.ethz.ch/cv ...

  7. 图像物体检測识别中的LBP特征

    版权声明:本文为博主原创文章,未经博主同意不得转载. https://blog.csdn.net/xinzhangyanxiang/article/details/37317863 图像物体检測识别中 ...

  8. 基于深度学习的人脸性别识别系统(含UI界面,Python代码)

    摘要:人脸性别识别是人脸识别领域的一个热门方向,本文详细介绍基于深度学习的人脸性别识别系统,在介绍算法原理的同时,给出Python的实现代码以及PyQt的UI界面.在界面中可以选择人脸图片.视频进行检 ...

  9. 卷积神经网络提取特征并用于SVM

    模式识别课程的一次作业.其目标是对UCI的手写数字数据集进行识别,样本数量大约是1600个.图片大小为16x16.要求必须使用SVM作为二分类的分类器. 本文重点是如何使用卷积神经网络(CNN)来提取 ...

随机推荐

  1. 【贪心】【P5078】Tweetuzki 爱军训

    Description Tweetuzki 所在的班级有 \(n\) 名学生,座号从 \(1\) 到 \(n\).有一次,教官命令班上的 \(n\) 名学生按照座号顺序从左到右排成一排站好军姿,其中 ...

  2. 【数学】【CF27E】 Number With The Given Amount Of Divisors

    传送门 Description 给定一个正整数\(n\),输出最小的整数,满足这个整数有n个因子 Input 一行一个整数\(n\) Output 一行一个整数,代表答案. Hint \(1~\leq ...

  3. PID控制算法的C语言实现六 抗积分饱和的PID控制算法C语言实现

    所谓的积分饱和现象是指如果系统存在一个方向的偏差,PID控制器的输出由于积分作用的不断累加而加大,从而导致执行机构达到极限位置,若控制器输出U(k)继续增大,执行器开度不可能再增大,此时计算机输出控制 ...

  4. Uiautomator 快速调试

    UiAutomatorHelper使用      1.介绍:     他是一种可以快速调试的方法:其本身也是java问津相当于自动化脚本,查看该文件,其主要实现的功能如下         1.创建bu ...

  5. 设置texture

    //获取内部资源贴图 public void setInsideTexture() { Texture2D texture = Resources.Load(texture_url) as Textu ...

  6. OpenCV---色彩空间(一)

    颜色空间:用三种或者更多特征来指定颜色的方法,被称为颜色空间或者颜色模型 1.RGB(OpenCV中为BGR): 一幅图像由三个独立的图像平面或者通道构成:红.蓝.绿(以及可选项:透明度alpha通道 ...

  7. Win7 32位安装Oracle11g R2 图解示例

    Win7 32位操作系统安装Oracle11g R2 图解示例.废话不说了,直接上图. 1.下载的两个oracle 11gR2压缩包解压到单独的文件夹中. 2.找到解压的database文件夹中的Se ...

  8. 算法专题-STL篇

    这篇文章着重记录c++中STL的用法.主要粗略的介绍其用法,以知识点的形式呈现其功能,不会深入源码分析其工作原理. 排序和检索. sort(a,a+n),对a[0]往后的n个元素(包括a[0])进行排 ...

  9. HDU6128 二次剩余/二次域求二次剩余解/LL快速乘法取模

    LINK 题意:求满足模p下$\frac{1}{a_i+a_j}\equiv\frac{1}{a_i}+\frac{1}{a_j}$的对数,其中$n,p(1\leq n\leq10^5,2\leq p ...

  10. Array和String测试与java.String.split

    java.string.split() 存在于java.lang包中,返回值是一个数组. 作用是按指定字符或者正则去切割某个字符串,结果以字符串数组形式返回. 例 String [] toSort = ...