网站开发程序员工资,什么是门户网站?,旅游公司网站难做吗,网站后台地址破解faster_whisper语音识别
检测可用设备#xff1a;list_available_devices()函数 我这边usb摄像头带麦克风的#xff0c;所以 DEV_index 8
1 使用 pyaudio 打开音频设备 2 从音频设备读取数据#xff0c;传递给 faster_whisper 识别 按键 r 录制 s 停止 q退出
test.py
#…faster_whisper语音识别
检测可用设备list_available_devices()函数 我这边usb摄像头带麦克风的所以 DEV_index 8
1 使用 pyaudio 打开音频设备 2 从音频设备读取数据传递给 faster_whisper 识别 按键 r 录制 s 停止 q退出
test.py
# from faster_whisper import WhisperModel# model WhisperModel(large-v3)# audio_path mlk.flac# segments, info model.transcribe(audio_path)# for segment in segments:
# print([%.2fs - %.2fs] %s % (segment.start, segment.end, segment.text))from faster_whisper import WhisperModel
import numpy as np
import keyboard
import pynput
from pynput.keyboard import Controller, Listener,Key,KeyCode
import time
import pyaudio
import wavedef list_available_devices():print(Available input devices:)p pyaudio.PyAudio()for i in range(p.get_device_count()):device_info p.get_device_info_by_index(i)if device_info[maxInputChannels] 0: # Check if its an input deviceprint(f{i}: {device_info[name]})p.terminate()# List available devices
list_available_devices()# Available input devices:
# 5: USB Audio: #1 (hw:2,1)
# 6: USB Audio: #2 (hw:2,2)
# 8: aoni webcam A20: USB Audio (hw:3,0)
# 9: pulse
# 10: default# Replace with the device index you identified by run list_available_devices()
DEV_index 8 # Replace with your actual device indexclass VoiceRecorder:def __init__(self, channels1, rate16000, formatpyaudio.paInt16):self.p pyaudio.PyAudio()self.model WhisperModel(large-v3)self.CHANNELS channelsself.RATE rateself.FORMAT formatdef record(self, seconds5):记录指定秒数的音频。CHUNK 1024try:stream self.p.open(formatself.FORMAT,channelsself.CHANNELS,rateself.RATE,inputTrue,input_device_indexDEV_index,frames_per_bufferCHUNK)print(开始录音...)frames []for i in range(0, int(self.RATE / CHUNK * seconds)):data stream.read(CHUNK)frames.append(data)print(录音结束.)except Exception as e:print(f录音时发生错误{e})return Nonefinally:stream.stop_stream()stream.close()return b.join(frames)def transcribe_audio(self, audio_data):将音频数据转换为文本。try:audio_np np.frombuffer(audio_data, dtypenp.int16)if self.CHANNELS 1:audio_np audio_np.reshape((-1, self.CHANNELS)).mean(axis1)audio_normalized np.float32(audio_np) / 32768.0segments, _ self.model.transcribe(audio_normalized, languagezh, beam_size5)return [segment.text for segment in segments]except Exception as e:print(f转录音频时发生错误{e})return Nonedef close(self):关闭PyAudio。self.p.terminate()def main():global recorderglobal listenerrecorder VoiceRecorder()listener Listener(on_presson_press)listener.start()listener.join()def on_press(key:KeyCode):print(type(key))if key.char r:print(开始录音...)audio_data recorder.record()if audio_data is not None:transcripts recorder.transcribe_audio(audio_data)for text in transcripts:print(text)print(录音结束.)elif key.char s:print(停止录音.)elif key.char q:print(退出程序.)listener.stop()recorder.close()if __name__ __main__:main()