-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaudio_record.py
More file actions
52 lines (39 loc) · 1.46 KB
/
audio_record.py
File metadata and controls
52 lines (39 loc) · 1.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import torch
import numpy as np
import sounddevice as sd
import scipy.io.wavfile as wav
import io
from silero_vad import load_silero_vad, get_speech_timestamps, collect_chunks
model, utils = torch.hub.load(
repo_or_dir='snakers4/silero-vad',
model='silero_vad',
force_reload=False
)
(get_speech_timestamps, save_audio, read_audio, VADIterator, collect_chunks) = utils
SAMPLE_RATE = 16000
CHANNELS = 1
MAX_DURATION = 20
def record_speech():
print('Started Recording')
# Record from microphone
recording = sd.rec(int(MAX_DURATION * SAMPLE_RATE), samplerate=SAMPLE_RATE, channels=CHANNELS, dtype="float32")
sd.wait()
# Convert to mono numpy
audio = recording.squeeze()
# 🔹 Detect speech segments
torch_audio = torch.from_numpy(audio).float()
speech_timestamps = get_speech_timestamps(torch_audio, model, sampling_rate=SAMPLE_RATE)
if not speech_timestamps:
print("❌ No speech detected.")
return False
else:
# Collect only speech parts (auto-trim silence)
speech_audio = collect_chunks(speech_timestamps, torch_audio)
# Save trimmed audio to memory (WAV)
buffer = io.BytesIO()
wav.write(buffer, SAMPLE_RATE, speech_audio.numpy())
buffer.seek(0) # Moves the pointer to the starting of the audio file in memory
with open('recordings/recording.mp3','wb') as f:
f.write(buffer.read())
print('Recording done')
return True