ios – 从音频文件中提取仪表级别_app

/// Holds audio information used for building waveformsfinal class AudioContext { /// The audio asset URL used to load the context public let audioURL: URL /// Total number of samples in loaded asset public let totalSamples: Int /// Loaded asset public let asset: AVAsset // Loaded assetTrack public let assetTrack: AVAssetTrack private init(audioURL: URL,totalSamples: Int,asset: AVAsset,assetTrack: AVAssetTrack) { self.audioURL = audioURL self.totalSamples = totalSamples self.asset = asset self.assetTrack = assetTrack } public static func load(fromAudioURL audioURL: URL,completionHandler: @escaPing (_ audioContext: AudioContext?) -> ()) { let asset = AVURLAsset(url: audioURL,options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)]) guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else { fatalError("Couldn't load AVAssetTrack") } asset.loadValuesAsynchronously(forKeys: ["duration"]) { var error: NSError? let status = asset.statusOfValue(forKey: "duration",error: &error) switch status { case .loaded: guard let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],let audioFormatDesc = formatDescriptions.first,let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc) else { break } let totalSamples = Int((asbd.pointee.mSampleRate) * float64(asset.duration.value) / float64(asset.duration.timescale)) let audioContext = AudioContext(audioURL: audioURL,totalSamples: totalSamples,asset: asset,assetTrack: assetTrack) completionHandler(audioContext) return case .Failed,.cancelled,.loading,.unkNown: print("Couldn't load asset: \(error?.localizedDescription ?? "UnkNown error")") } completionHandler(nil) } }}

func render(audioContext: AudioContext?,targetSamples: Int = 100) -> [float]{ guard let audioContext = audioContext else { fatalError("Couldn't create the audioContext") } let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3 guard let reader = try? AVAssetReader(asset: audioContext.asset) else { fatalError("Couldn't initialize the AVAssetReader") } reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound),timescale: audioContext.asset.duration.timescale),duration: CMTime(value: Int64(sampleRange.count),timescale: audioContext.asset.duration.timescale)) let outputSettingsDict: [String : Any] = [ AVFormatIDKey: Int(kAudioFormatlinearPCM),AVlinearPCMBitDepthKey: 16,AVlinearPCMIsBigEndianKey: false,AVlinearPCMIsfloatKey: false,AVlinearPCMIsNonInterleaved: false ] let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,outputSettings: outputSettingsDict) readerOutput.alwayscopIEsSampleData = false reader.add(readerOutput) var channelCount = 1 let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription] for item in formatDescriptions { guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else { fatalError("Couldn't get the format description") } channelCount = Int(fmtDesc.pointee.mChannelsPerFrame) } let samplesPerPixel = max(1,channelCount * sampleRange.count / targetSamples) let filter = [float](repeating: 1.0 / float(samplesPerPixel),count: samplesPerPixel) var outputSamples = [float]() var sampleBuffer = Data() // 16-bit samples reader.startReading() defer { reader.cancelReading() } while reader.status == .reading { guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else { break } // Append audio sample buffer into our current sample buffer var readBufferLength = 0 var readBufferPointer: UnsafeMutablePointer<Int8>? CMBlockBufferGetDataPointer(readBuffer,&readBufferLength,nil,&readBufferPointer) sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer,count: readBufferLength)) CMSampleBufferInvalIDate(readSampleBuffer) let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size let downSampledLength = totalSamples / samplesPerPixel let samplestoprocess = downSampledLength * samplesPerPixel guard samplestoprocess > 0 else { continue } processSamples(fromData: &sampleBuffer,outputSamples: &outputSamples,samplestoprocess: samplestoprocess,downSampledLength: downSampledLength,samplesPerPixel: samplesPerPixel,filter: filter) //print("Status: \(reader.status)") } // Process the remaining samples at the end which dIDn't fit into samplesPerPixel let samplestoprocess = sampleBuffer.count / MemoryLayout<Int16>.size if samplestoprocess > 0 { let downSampledLength = 1 let samplesPerPixel = samplestoprocess let filter = [float](repeating: 1.0 / float(samplesPerPixel),count: samplesPerPixel) processSamples(fromData: &sampleBuffer,filter: filter) //print("Status: \(reader.status)") } // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnkNown) guard reader.status == .completed || true else { fatalError("Couldn't read the audio file") } return outputSamples}

func processSamples(fromData sampleBuffer: inout Data,outputSamples: inout [float],samplestoprocess: Int,downSampledLength: Int,samplesPerPixel: Int,filter: [float]) { sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in var processingBuffer = [float](repeating: 0.0,count: samplestoprocess) let sampleCount = vDSP_Length(samplestoprocess) //Convert 16bit int samples to floats vDSP_vflt16(samples,1,&processingBuffer,sampleCount) //Take the absolute values to get amplitude vDSP_vabs(processingBuffer,sampleCount) //get the corresponding dB,and clip the results getdB(from: &processingBuffer) //Downsample and average var downSampledData = [float](repeating: 0.0,count: downSampledLength) vDSP_desamp(processingBuffer,vDSP_StrIDe(samplesPerPixel),filter,&downSampledData,vDSP_Length(downSampledLength),vDSP_Length(samplesPerPixel)) //Remove processed samples sampleBuffer.removeFirst(samplestoprocess * MemoryLayout<Int16>.size) outputSamples += downSampledData }}

func getdB(from normalizedSamples: inout [float]) { // Convert samples to a log scale var zero: float = 32768.0 vDSP_vdbcon(normalizedSamples,&zero,&normalizedSamples,vDSP_Length(normalizedSamples.count),1) //Clip to [noiseFloor,0] var ceil: float = 0.0 var noiseFloorMutable = noiseFloor vDSP_vclip(normalizedSamples,&noiseFloorMutable,&ceil,vDSP_Length(normalizedSamples.count))}

guard let path = Bundle.main.path(forResource: "audio",ofType:"mp3") else { fatalError("Couldn't find the file path")}let url = URL(fileURLWithPath: path)var outputArray : [float] = []AudioContext.load(fromAudioURL: url,completionHandler: { audioContext in guard let audioContext = audioContext else { fatalError("Couldn't create the audioContext") } outputArray = self.render(audioContext: audioContext,targetSamples: 300)})

func averagePowers(audiofileURL: URL,forChannel channelNumber: Int,completionHandler: @escaPing(_ success: [float]) -> ()) { let audiofile = try! AVAudiofile(forReading: audiofileURL) let audiofilepformat = audiofile.processingFormat let audiofileLength = audiofile.length //Set the size of frames to read from the audio file,you can adjust this to your liking let frameSizetoRead = Int(audiofilepformat.sampleRate/20) //This is to how many frames/portions we're going to divIDe the audio file let numberOfFrames = Int(audiofileLength)/frameSizetoRead //Create a pcm buffer the size of a frame guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audiofilepformat,frameCapacity: AVAudioFrameCount(frameSizetoRead)) else { fatalError("Couldn't create the audio buffer") } //Do the calculations in a background thread,if you don't want to block the main thread for larger audio files dispatchQueue.global(qos: .userInitiated).async { //This is the array to be returned var returnArray : [float] = [float]() //We're going to read the audio file,frame by frame for i in 0..<numberOfFrames { //Change the position from which we are reading the audio file,since each frame starts from a different position in the audio file audiofile.frameposition = AVAudioFrameposition(i * frameSizetoRead) //Read the frame from the audio file try! audiofile.read(into: audioBuffer,frameCount: AVAudioFrameCount(frameSizetoRead)) //Get the data from the chosen channel let channelData = audioBuffer.floatChannelData![channelNumber] //This is the array of floats let arr = Array(UnsafeBufferPointer(start:channelData,count: frameSizetoRead)) //Calculate the mean value of the absolute values let meanValue = arr.reduce(0,{

let path = Bundle.main.path(forResource: "audio.mp3",ofType:nil)!let url = URL(fileURLWithPath: path)averagePowers(audiofileURL: url,forChannel: 0,completionHandler: { array in    //Use the array})

+ abs()})/float(arr.count) //Calculate the dB power (You can adjust this),if average is less than 0.000_000_01 we limit it to -160.0 let dbPower: float = meanValue > 0.000_000_01 ? 20 * log10(meanValue) : -160.0 //append the db power in the current frame to the returnArray returnArray.append(dbPower) } //Return the dBPowers completionHandler(returnArray) }}

概述我需要从文件中提取音频表级别,以便在播放音频之前渲染级别.我知道AVAudioPlayer可以在播放音频文件时获取此信息 func averagePower(forChannel channelNumber: Int) -> Float. 但在我的情况下,我希望事先得到一个米级的[Float]. 更快的解决方案它需要一个iPhone 6s加： > 0.538秒处理8MByte mp3播放器,持续我需要从文件中提取音频表级别,以便在播放音频之前渲染级别.我知道AVAudioPlayer可以在播放音频文件时获取此信息

func averagePower(forChannel channelNumber: Int) -> float.

但在我的情况下,我希望事先得到一个米级的[float].

解决方法更快的解决方案

它需要一个iPhone 6s加：

> 0.538秒处理8MByte mp3播放器,持续时间为4分47秒,采样率为44,100
> 0.170s处理712KByte MP3播放器,持续时间为22s,100
> 0.089s来处理通过在终端中使用此命令afconvert -f caff -d LEI16 audio.mp3 audio.caf转换上面的文件而创建的caffile.

让我们开始：

A)声明此类将保存有关音频资产的必要信息：

/// Holds audio information used for building waveformsfinal class AudioContext {    /// The audio asset URL used to load the context    public let audioURL: URL    /// Total number of samples in loaded asset    public let totalSamples: Int    /// Loaded asset    public let asset: AVAsset    // Loaded assetTrack    public let assetTrack: AVAssetTrack    private init(audioURL: URL,totalSamples: Int,asset: AVAsset,assetTrack: AVAssetTrack) {        self.audioURL = audioURL        self.totalSamples = totalSamples        self.asset = asset        self.assetTrack = assetTrack    }    public static func load(fromAudioURL audioURL: URL,completionHandler: @escaPing (_ audioContext: AudioContext?) -> ()) {        let asset = AVURLAsset(url: audioURL,options: [AVURLAssetPreferPreciseDurationAndTimingKey: NSNumber(value: true as Bool)])        guard let assetTrack = asset.tracks(withMediaType: AVMediaType.audio).first else {            fatalError("Couldn't load AVAssetTrack")        }        asset.loadValuesAsynchronously(forKeys: ["duration"]) {            var error: NSError?            let status = asset.statusOfValue(forKey: "duration",error: &error)            switch status {            case .loaded:                guard                    let formatDescriptions = assetTrack.formatDescriptions as? [CMAudioFormatDescription],let audioFormatDesc = formatDescriptions.first,let asbd = CMAudioFormatDescriptionGetStreamBasicDescription(audioFormatDesc)                    else { break }                let totalSamples = Int((asbd.pointee.mSampleRate) * float64(asset.duration.value) / float64(asset.duration.timescale))                let audioContext = AudioContext(audioURL: audioURL,totalSamples: totalSamples,asset: asset,assetTrack: assetTrack)                completionHandler(audioContext)                return            case .Failed,.cancelled,.loading,.unkNown:                print("Couldn't load asset: \(error?.localizedDescription ?? "UnkNown error")")            }            completionHandler(nil)        }    }}

我们将使用其异步函数load,并将其结果处理为完成处理程序.

B)在视图控制器中导入AVFoundation并加速：

import AVFoundationimport Accelerate

C)在视图控制器中声明噪声级别(以dB为单位)：

let noiseFloor: float = -80

例如,任何小于-80dB的东西都将被视为静音.

D)以下功能采用音频上下文并产生所需的dB功率. targetSamples默认设置为100,您可以更改它以满足您的UI需求：

func render(audioContext: AudioContext?,targetSamples: Int = 100) -> [float]{    guard let audioContext = audioContext else {        fatalError("Couldn't create the audioContext")    }    let sampleRange: CountableRange<Int> = 0..<audioContext.totalSamples/3    guard let reader = try? AVAssetReader(asset: audioContext.asset)        else {            fatalError("Couldn't initialize the AVAssetReader")    }    reader.timeRange = CMTimeRange(start: CMTime(value: Int64(sampleRange.lowerBound),timescale: audioContext.asset.duration.timescale),duration: CMTime(value: Int64(sampleRange.count),timescale: audioContext.asset.duration.timescale))    let outputSettingsDict: [String : Any] = [        AVFormatIDKey: Int(kAudioFormatlinearPCM),AVlinearPCMBitDepthKey: 16,AVlinearPCMIsBigEndianKey: false,AVlinearPCMIsfloatKey: false,AVlinearPCMIsNonInterleaved: false    ]    let readerOutput = AVAssetReaderTrackOutput(track: audioContext.assetTrack,outputSettings: outputSettingsDict)    readerOutput.alwayscopIEsSampleData = false    reader.add(readerOutput)    var channelCount = 1    let formatDescriptions = audioContext.assetTrack.formatDescriptions as! [CMAudioFormatDescription]    for item in formatDescriptions {        guard let fmtDesc = CMAudioFormatDescriptionGetStreamBasicDescription(item) else {            fatalError("Couldn't get the format description")        }        channelCount = Int(fmtDesc.pointee.mChannelsPerFrame)    }    let samplesPerPixel = max(1,channelCount * sampleRange.count / targetSamples)    let filter = [float](repeating: 1.0 / float(samplesPerPixel),count: samplesPerPixel)    var outputSamples = [float]()    var sampleBuffer = Data()    // 16-bit samples    reader.startReading()    defer { reader.cancelReading() }    while reader.status == .reading {        guard let readSampleBuffer = readerOutput.copyNextSampleBuffer(),let readBuffer = CMSampleBufferGetDataBuffer(readSampleBuffer) else {                break        }        // Append audio sample buffer into our current sample buffer        var readBufferLength = 0        var readBufferPointer: UnsafeMutablePointer<Int8>?        CMBlockBufferGetDataPointer(readBuffer,&readBufferLength,nil,&readBufferPointer)        sampleBuffer.append(UnsafeBufferPointer(start: readBufferPointer,count: readBufferLength))        CMSampleBufferInvalIDate(readSampleBuffer)        let totalSamples = sampleBuffer.count / MemoryLayout<Int16>.size        let downSampledLength = totalSamples / samplesPerPixel        let samplestoprocess = downSampledLength * samplesPerPixel        guard samplestoprocess > 0 else { continue }        processSamples(fromData: &sampleBuffer,outputSamples: &outputSamples,samplestoprocess: samplestoprocess,downSampledLength: downSampledLength,samplesPerPixel: samplesPerPixel,filter: filter)        //print("Status: \(reader.status)")    }    // Process the remaining samples at the end which dIDn't fit into samplesPerPixel    let samplestoprocess = sampleBuffer.count / MemoryLayout<Int16>.size    if samplestoprocess > 0 {        let downSampledLength = 1        let samplesPerPixel = samplestoprocess        let filter = [float](repeating: 1.0 / float(samplesPerPixel),count: samplesPerPixel)        processSamples(fromData: &sampleBuffer,filter: filter)        //print("Status: \(reader.status)")    }    // if (reader.status == AVAssetReaderStatusFailed || reader.status == AVAssetReaderStatusUnkNown)    guard reader.status == .completed || true else {        fatalError("Couldn't read the audio file")    }    return outputSamples}

E)render使用此函数对音频文件中的数据进行下采样,并转换为分贝：

func processSamples(fromData sampleBuffer: inout Data,outputSamples: inout [float],samplestoprocess: Int,downSampledLength: Int,samplesPerPixel: Int,filter: [float]) {    sampleBuffer.withUnsafeBytes { (samples: UnsafePointer<Int16>) in        var processingBuffer = [float](repeating: 0.0,count: samplestoprocess)        let sampleCount = vDSP_Length(samplestoprocess)        //Convert 16bit int samples to floats        vDSP_vflt16(samples,1,&processingBuffer,sampleCount)        //Take the absolute values to get amplitude        vDSP_vabs(processingBuffer,sampleCount)        //get the corresponding dB,and clip the results        getdB(from: &processingBuffer)        //Downsample and average        var downSampledData = [float](repeating: 0.0,count: downSampledLength)        vDSP_desamp(processingBuffer,vDSP_StrIDe(samplesPerPixel),filter,&downSampledData,vDSP_Length(downSampledLength),vDSP_Length(samplesPerPixel))        //Remove processed samples        sampleBuffer.removeFirst(samplestoprocess * MemoryLayout<Int16>.size)        outputSamples += downSampledData    }}

F)反过来调用此函数获取相应的dB,并将结果剪辑为[noiseFloor,0]：

func getdB(from normalizedSamples: inout [float]) {    // Convert samples to a log scale    var zero: float = 32768.0    vDSP_vdbcon(normalizedSamples,&zero,&normalizedSamples,vDSP_Length(normalizedSamples.count),1)    //Clip to [noiseFloor,0]    var ceil: float = 0.0    var noiseFloorMutable = noiseFloor    vDSP_vclip(normalizedSamples,&noiseFloorMutable,&ceil,vDSP_Length(normalizedSamples.count))}

G)最后你可以像这样得到音频的波形：

guard let path = Bundle.main.path(forResource: "audio",ofType:"mp3") else {    fatalError("Couldn't find the file path")}let url = URL(fileURLWithPath: path)var outputArray : [float] = []AudioContext.load(fromAudioURL: url,completionHandler: { audioContext in    guard let audioContext = audioContext else {        fatalError("Couldn't create the audioContext")    }    outputArray = self.render(audioContext: audioContext,targetSamples: 300)})

不要忘记AudioContext.load(fromAudioURL :)是异步的.

该解决方案由William Entriken从this repo合成.所有的功劳归于他.

老解决方案

这是一个可用于预渲染音频文件的音量级别而无需播放的功能：

func averagePowers(audiofileURL: URL,forChannel channelNumber: Int,completionHandler: @escaPing(_ success: [float]) -> ()) {    let audiofile = try! AVAudiofile(forReading: audiofileURL)    let audiofilepformat = audiofile.processingFormat    let audiofileLength = audiofile.length    //Set the size of frames to read from the audio file,you can adjust this to your liking    let frameSizetoRead = Int(audiofilepformat.sampleRate/20)    //This is to how many frames/portions we're going to divIDe the audio file    let numberOfFrames = Int(audiofileLength)/frameSizetoRead    //Create a pcm buffer the size of a frame    guard let audioBuffer = AVAudioPCMBuffer(pcmFormat: audiofilepformat,frameCapacity: AVAudioFrameCount(frameSizetoRead)) else {        fatalError("Couldn't create the audio buffer")    }    //Do the calculations in a background thread,if you don't want to block the main thread for larger audio files    dispatchQueue.global(qos: .userInitiated).async {        //This is the array to be returned        var returnArray : [float] = [float]()        //We're going to read the audio file,frame by frame        for i in 0..<numberOfFrames {            //Change the position from which we are reading the audio file,since each frame starts from a different position in the audio file            audiofile.frameposition = AVAudioFrameposition(i * frameSizetoRead)            //Read the frame from the audio file            try! audiofile.read(into: audioBuffer,frameCount: AVAudioFrameCount(frameSizetoRead))            //Get the data from the chosen channel            let channelData = audioBuffer.floatChannelData![channelNumber]            //This is the array of floats            let arr = Array(UnsafeBufferPointer(start:channelData,count: frameSizetoRead))            //Calculate the mean value of the absolute values            let meanValue = arr.reduce(0,{let path = Bundle.main.path(forResource: "audio.mp3",ofType:nil)!let url = URL(fileURLWithPath: path)averagePowers(audiofileURL: url,forChannel: 0,completionHandler: { array in    //Use the array})
 + abs()})/float(arr.count)            //Calculate the dB power (You can adjust this),if average is less than 0.000_000_01 we limit it to -160.0            let dbPower: float = meanValue > 0.000_000_01 ? 20 * log10(meanValue) : -160.0            //append the db power in the current frame to the returnArray            returnArray.append(dbPower)        }        //Return the dBPowers        completionHandler(returnArray)    }}

你可以这样称呼它：

使用仪器,此解决方案在1.2秒内使用高cpu,使用returnArray返回主线程大约需要5秒,在低电池模式下最多需要10秒.

总结

以上是内存溢出为你收集整理的ios – 从音频文件中提取仪表级别全部内容，希望文章能够帮你解决ios – 从音频文件中提取仪表级别所遇到的程序开发问题。

如果觉得内存溢出网站内容还不错，欢迎将内存溢出网站推荐给程序员好友。

欢迎分享，转载请注明来源：内存溢出

原文地址: https://outofmemory.cn/web/1105799.html

ios – 从音频文件中提取仪表级别

发表评论

评论列表（0条）