iOS语音转文字实现

iOS语音转文字实现,第1张

目前正在搞一个IM的APP,内部好友之间可以发送语音,需要长按实现语音转文字的功能,之前使用的是阿里的NUI.framework,但是这个破玩意,经常出现转出来的文字重复,即使了多声道控制都无法处理掉,体验太差。没办法,就决定替换为apple自己的实现,毕竟siri那么强大!此实现包含本地音频及远程音频,你只需要按时数据model保存对应的path即可,内部会自动识别。

现在来看看实现条件:

Info.plist里面添加两个键值对:

1、Privacy - Speech Recognition Usage Description (用于请求语音识别

2、 Privacy - Microphone Usage Description(用于请求麦克风语音输入授权)。

并给出相应的文字描述。

导入库文件:

#import

以下为实现头文件及逻辑文件:

头文件: NSVoice2Text.h

//
//  NSVoice2Text.h
//  语音转文字
//
//  Created by wise on 2021/10/13.
//

#import 
#import 

NS_ASSUME_NONNULL_BEGIN

typedef NS_ENUM(NSUInteger, NSVoice2TextAuthorationStatus) {
    NSVoice2TextAuthorizationStatusNotDetermined,  //语音识别未授权
    NSVoice2TextAuthorizationStatusDenied,         //用户拒绝使用语音识别
    NSVoice2TextAuthorizationStatusRestricted,     //语音识别在这台设备上受到限制
    NSVoice2TextAuthorizationStatusAuthorized,     //可以语音识别
};


@interface NSVoiceModel : NSObject
@property (nonatomic,copy) NSString *path;

@property (nonatomic,assign) NSInteger taskId;

@property (nonatomic,assign) BOOL isRunning;

@property (nonatomic,assign) BOOL isInQueue;
@end


@interface NSVoice2TextFinal : NSObject
@property (nonatomic,copy) NSString *value;

@property (nonatomic,assign) NSInteger taskId;

@property (nonatomic,copy) NSError * __nullable error;
@end


@interface NSVoice2Text : NSObject

+ (BOOL) isRunning;

//权限
+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock;

+ (void)voice2TextGotter:(NSArray *)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock rtaget:(id)rtaget;

@end

NS_ASSUME_NONNULL_END

实现文件:NSVoice2Text.m

//
//  NSVoice2Text.m
//  语音转文字
//
//  Created by wise on 2021/10/13.
//

#import "NSVoice2Text.h"
#import "NSMutableTaskQueue.h"

typedef void (^VoiceConversionResultsBlock) (NSVoice2TextFinal *finalValue);

@interface NSVoiceModel ()
@property (nonatomic,weak) id taskTarget;

@property (nonatomic, copy) VoiceConversionResultsBlock voiceConversionBlock;

@property (nonatomic, copy) void (^voiceConversionRunningBlock)(NSVoiceModel *md);
@end

@implementation NSVoiceModel

@end

@implementation NSVoice2TextFinal
@end


static NSVoice2Text *v2text = nil;

@interface NSVoice2Text ()
{
    NSMutableArray * taskList;
}

@property (nonatomic, assign) NSVoice2TextAuthorationStatus authorationStatus;

@property(nonatomic,strong)SFSpeechRecognizer *speechRecognizer;//语音识别器

@end

@implementation NSVoice2Text
- (instancetype)init
{
    self = [super init];
    if (self)
    {
        taskList = [NSMutableArray arrayWithCapacity:0];
    }
    return self;
}

+ (instancetype)shareInstance
{
    if (!v2text)
    {
        v2text = [[NSVoice2Text alloc] init];
    }
    
    return v2text;
}


+ (void)releaseInstance
{
    if (v2text)
    {
        v2text = nil;
    }
}


- (SFSpeechRecognizer *)speechRecognizer
{
    if (_speechRecognizer == nil) {
        NSLocale *cale = [[NSLocale alloc]initWithLocaleIdentifier:@"zh-CN"];
        _speechRecognizer = [[SFSpeechRecognizer alloc]initWithLocale:cale];
        _speechRecognizer.delegate = self;
    }
    return _speechRecognizer;
}


+ (BOOL) isRunning
{
    NSVoiceModel *md = [[NSVoice2Text shareInstance]->taskList firstObject];
    return md.isRunning;
}


- (void)resume
{
    NSVoiceModel *md = [self->taskList firstObject];
    if (md && !md.isInQueue)
    {
        md.isInQueue = YES;
        if (md.voiceConversionRunningBlock)
        {
            md.voiceConversionRunningBlock(md);
        }
        if (md.path && md.path > 0 && !md.isRunning)
        {
            md.isRunning = YES;
            NSString *text = @"^(http|https)+.*";
            NSPredicate *regextest = [NSPredicate predicateWithFormat:@"SELF MATCHES %@", text];
            BOOL flag = [regextest evaluateWithObject:md.path];
            if (flag)
            {
                [self startVoiceConversionWithURL:md.path];
            }
            else
            {
                [self startVoiceConversionWithFilePath:md.path];
            }
        }
        else
        {
            NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
            el.taskId = -1;
            el.error = [NSError errorWithDomain:@"语音路径错误或为空" code:404 userInfo:nil];
            md.voiceConversionBlock(el);
        }
    }
}


- (void)addItToTask:(NSVoiceModel *)md
{
    __block BOOL contained = NO;
    [taskList enumerateObjectsUsingBlock:^(NSVoiceModel * _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop) {
        if (obj.taskId == md.taskId)
        {
            contained = YES;
            *stop = YES;
        }
    }];
    if (!contained)
    {
        [taskList addObject:md];
    }
}


+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock
{
    //发送语音认证请求(首先要判断设备是否支持语音识别功能)
    [SFSpeechRecognizer requestAuthorization:^(SFSpeechRecognizerAuthorizationStatus status)
    {
        [[NSVoice2Text shareInstance] setAuthorationStatus:status];
        requestBlock(status);
    }];
}

+ (void)voice2TextGotter:(NSArray *)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock rtaget:(id)rtaget
{
    [glist enumerateObjectsUsingBlock:^(NSVoiceModel * _Nonnull obj, NSUInteger idx, BOOL * _Nonnull stop)
    {
        [obj setVoiceConversionRunningBlock:runningModelBlock];
        [obj setVoiceConversionBlock:resultsBlock];
        [obj setTaskTarget:rtaget];
        [[NSVoice2Text shareInstance] addItToTask:obj];
    }];
    
    [[NSVoice2Text shareInstance] resume];
}


- (void)startVoiceConversionWithFilePath:(NSString *)path
{
    [self startVoiceConversion:[NSURL fileURLWithPath:path]];
}

- (void)startVoiceConversionWithURL:(NSString *)url
{
    [self startVoiceConversion:[NSURL URLWithString:url]];
}

#pragma mark - private methods
///开始转换
- (void)startVoiceConversion:(NSURL *)url
{
    __weak typeof(taskList) weakTaskList = taskList;
    __weak typeof(self) this = self;
    SFSpeechURLRecognitionRequest *recognitionRequest = [[SFSpeechURLRecognitionRequest alloc]initWithURL:url];
    NSLocale *cale = [[NSLocale alloc]initWithLocaleIdentifier:@"zh-CN"];
    SFSpeechRecognizer *sp = [[SFSpeechRecognizer alloc]initWithLocale:cale];
    
    NSOperationQueue *otherQuene = [[NSOperationQueue alloc]init];
    [sp setQueue:otherQuene];
    [sp recognitionTaskWithRequest:recognitionRequest resultHandler:^(SFSpeechRecognitionResult * _Nullable result, NSError *  error)
    {
        NSVoiceModel *md = [weakTaskList firstObject];
        if (md.taskTarget)
        {
            if (!error)
            {
                if (result)
                {
                    BOOL isFinal = [result isFinal];//是否结束
                    if (isFinal)
                    {
                        NSString *str = [[result bestTranscription]formattedString];
                        NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
                        el.taskId = md.taskId;
                        el.error = nil;
                        el.value = str;
                        md.voiceConversionBlock(el);
                        
                        [weakTaskList removeObject:md];
                        [this resume];
                    }
                }
                else
                {
                    NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
                    el.taskId = md.taskId;
                    el.error = error;
                    md.voiceConversionBlock(el);
                    
                    [weakTaskList removeObject:md];
                    
                    [this resume];
                }
            }
            else
            {
                NSVoice2TextFinal *el = [[NSVoice2TextFinal alloc] init];
                el.taskId = md.taskId;
                el.error = error;
                md.voiceConversionBlock(el);
                
                [weakTaskList removeObject:md];
                [this resume];
            }
        }
        else
        {
            [weakTaskList removeObject:md];
            [this resume];
        }
    }];
}
@end

此实现内部已经实现了队列转文字功能,你只需要随时传入数据模型即可。

代码分析:

1、权限请求

+ (void)voice2TextRequestAuthorationStatus:(void (^)(NSVoice2TextAuthorationStatus status))requestBlock;

 用于请求隐私权限,只有用户同意后方可使用此功能。否则无法使用此功能。

2、传入音频文件路径

+ (void)voice2TextGotter:(NSArray *)glist runningModelBlock:(void (^__nullable)(NSVoiceModel *amodel))runningModelBlock resultsBlock:(void (^)(NSVoice2TextFinal *finalValue))resultsBlock rtaget:(id)rtaget

音频以数据模型NSVoiceModel传入,将你的音频文件与此模型实现映射关系,taskID用于实现绑定,参考头文件的定义及实现。

2.1 runningModelBlock,因为支持队列事务,所以,当前正在处理哪条,则会对外输出此条。页面上可以此显示"正在转换中"文字

2.2 resultsBlock,转换结果文字,以NSVoice2TextFinal对外输出,你只需要处理好这里面的逻辑好可。

3、完整使用:

[NSVoice2Text voice2TextRequestAuthorationStatus:^(NSVoice2TextAuthorationStatus status)
    {
        if (status == NSVoice2TextAuthorizationStatusAuthorized)
        {
            NSVoiceModel *md = [[NSVoiceModel alloc] init];
            [md setTaskId:[bmodel.messageId integerValue]];
            [md setPath:bmodel.audioFilePath];
            
            [NSVoice2Text voice2TextGotter:@[md] runningModelBlock:^(NSVoiceModel * _Nonnull amodel)
            {
                NSString *taskId = intToStr(amodel.taskId);
                //通过taskId找到对应的处理的UI,显示"正在转换中"
            }
            resultsBlock:^(NSVoice2TextFinal * _Nonnull finalValue)
            {
                if (!finalValue.error)
                {
                    NSString *taskId = intToStr(finalValue.taskId);
                    NSString *trTexgt = [finalValue value];

                    //通过taskId找到对应的处理的UI,转换完成,得到转换后的文字
                }
                else
                {
                    NSString *taskId = intToStr(finalValue.taskId);
                    //此taskId对应的语音转换失败,亦可找到对应的UI,显示"转换失败"等文字
                }
            }];
        }
        else
        {
            [weakSelf showToastMessageThenHide:@"未授权使用语音识别功能"];
        }
    }rtaget:weakSelf];

欢迎分享,转载请注明来源:内存溢出

原文地址: http://outofmemory.cn/web/993999.html

(0)
打赏 微信扫一扫 微信扫一扫 支付宝扫一扫 支付宝扫一扫
上一篇 2022-05-21
下一篇 2022-05-21

发表评论

登录后才能评论

评论列表(0条)

保存