Skip to content

Commit 2cfb8fc

Browse files
authored
Merge pull request #1 from StormFox23/mic
Mic
2 parents 8685076 + fa9d0ad commit 2cfb8fc

File tree

6 files changed

+360
-1
lines changed

6 files changed

+360
-1
lines changed

.gitignore

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ __pycache__/
55

66
# C extensions
77
*.so
8-
8+
mic/file.wav
99
# Distribution / packaging
1010
.Python
1111
build/

mic/mic_wave.py

Lines changed: 93 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,93 @@
1+
try:
2+
import pyaudio
3+
import numpy as np
4+
import pylab
5+
import matplotlib.pyplot as plt
6+
from scipy.io import wavfile
7+
import time
8+
import sys
9+
# import seaborn as sns
10+
except:
11+
print("error import")
12+
13+
i = 0
14+
f, ax = plt.subplots(2)
15+
16+
x = np.arange(10000)
17+
y = np.random.randn(10000)
18+
19+
li, = ax[0].plot(x, y)
20+
ax[0].set_xlim(0, 1000)
21+
ax[0].set_ylim(-5000, 5000)
22+
ax[0].set_title("Raw Audio Signal")
23+
24+
li2, = ax[1].plot(x, y)
25+
ax[1].set_xlim(0, 5000)
26+
ax[1].set_ylim(-100, 100)
27+
ax[1].set_title("Fast Fourier Transform (FFT)")
28+
29+
plt.pause(0.01)
30+
plt.tight_layout()
31+
32+
FORMAT = pyaudio.paInt16 # We use 16bit format per sample
33+
CHANNELS = 1
34+
RATE = 44100
35+
CHUNK = 1024 # 1024bytes of data red from a buffer
36+
RECORD_SECONDS = 0.1
37+
WAVE_OUTPUT_FILENAME = "file.wav"
38+
39+
audio = pyaudio.PyAudio()
40+
41+
# start Recording
42+
stream = audio.open(format=FORMAT,
43+
channels=CHANNELS,
44+
rate=RATE,
45+
input=True) # ,
46+
# frames_per_buffer=CHUNK)
47+
48+
global keep_going
49+
keep_going = True
50+
51+
52+
def plot_data(in_data):
53+
# get and convert the data to float
54+
audio_data = np.frombuffer(in_data, np.int16)
55+
# Fast Fourier Transform, 10*log10(abs) is to scale it to dB
56+
# and make sure it's not imaginary
57+
dfft = 10. * np.log10(abs(np.fft.rfft(audio_data)))
58+
59+
# Force the new data into the plot, but without redrawing axes.
60+
# If uses plt.draw(), axes are re-drawn every time
61+
# print audio_data[0:10]
62+
# print dfft[0:10]
63+
# print
64+
li.set_xdata(np.arange(len(audio_data)))
65+
li.set_ydata(audio_data)
66+
li2.set_xdata(np.arange(len(dfft)) * 10.)
67+
li2.set_ydata(dfft)
68+
69+
# Show the updated plot, but without blocking
70+
plt.pause(0.01)
71+
if keep_going:
72+
return True
73+
else:
74+
return False
75+
76+
77+
stream.start_stream()
78+
print("\n+---------------------------------+")
79+
print("| Premi Ctrl+C per terminare |")
80+
print("+---------------------------------+\n")
81+
82+
while keep_going:
83+
try:
84+
plot_data(stream.read(CHUNK))
85+
except KeyboardInterrupt:
86+
keep_going = False
87+
except:
88+
pass
89+
90+
stream.stop_stream()
91+
stream.close()
92+
93+
audio.terminate()

mic/record_wave.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
import pyaudio
2+
import wave
3+
4+
FORMAT = pyaudio.paInt16
5+
CHANNELS = 2
6+
RATE = 44100
7+
CHUNK = 1024
8+
RECORD_SECONDS = 5
9+
WAVE_OUTPUT_FILENAME = "file.wav"
10+
11+
audio = pyaudio.PyAudio()
12+
13+
# start Recording
14+
stream = audio.open(format=FORMAT, channels=CHANNELS,
15+
rate=RATE, input=True,
16+
frames_per_buffer=CHUNK)
17+
print("recording...")
18+
frames = []
19+
20+
for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)):
21+
data = stream.read(CHUNK)
22+
frames.append(data)
23+
print("finished recording")
24+
25+
# stop Recording
26+
stream.stop_stream()
27+
stream.close()
28+
audio.terminate()
29+
30+
waveFile = wave.open(WAVE_OUTPUT_FILENAME, 'wb')
31+
waveFile.setnchannels(CHANNELS)
32+
waveFile.setsampwidth(audio.get_sample_size(FORMAT))
33+
waveFile.setframerate(RATE)
34+
waveFile.writeframes(b''.join(frames))
35+
waveFile.close()

mic/server/client.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
#!/usr/bin/env python
2+
3+
import pyaudio
4+
import socket
5+
import sys
6+
7+
FORMAT = pyaudio.paInt16
8+
CHANNELS = 1
9+
RATE = 44100
10+
CHUNK = 4096
11+
12+
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
13+
s.connect((sys.argv[1], int(sys.argv[2])))
14+
audio = pyaudio.PyAudio()
15+
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, output=True, frames_per_buffer=CHUNK)
16+
17+
try:
18+
while True:
19+
data = s.recv(CHUNK)
20+
stream.write(data)
21+
except KeyboardInterrupt:
22+
pass
23+
24+
print('Shutting down')
25+
s.close()
26+
stream.close()
27+
audio.terminate()

mic/server/server.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,54 @@
1+
#!/usr/bin/env python
2+
3+
import pyaudio
4+
import socket
5+
import select
6+
7+
FORMAT = pyaudio.paInt16
8+
CHANNELS = 1
9+
RATE = 44100
10+
CHUNK = 4096
11+
12+
audio = pyaudio.PyAudio()
13+
14+
serversocket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
15+
serversocket.bind(('', 4444))
16+
serversocket.listen(5)
17+
18+
19+
def callback(in_data, frame_count, time_info, status):
20+
for s in read_list[1:]:
21+
s.send(in_data)
22+
return (None, pyaudio.paContinue)
23+
24+
25+
# start Recording
26+
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK, stream_callback=callback)
27+
# stream.start_stream()
28+
29+
read_list = [serversocket]
30+
print("recording...")
31+
32+
try:
33+
while True:
34+
readable, writable, errored = select.select(read_list, [], [])
35+
for s in readable:
36+
if s is serversocket:
37+
(clientsocket, address) = serversocket.accept()
38+
read_list.append(clientsocket)
39+
print("Connection from", address)
40+
else:
41+
data = s.recv(1024)
42+
if not data:
43+
read_list.remove(s)
44+
except KeyboardInterrupt:
45+
pass
46+
47+
48+
print("finished recording")
49+
50+
serversocket.close()
51+
# stop Recording
52+
stream.stop_stream()
53+
stream.close()
54+
audio.terminate()

mic/vad.py

Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
'''
2+
Requirements:
3+
+ pyaudio - `pip install pyaudio`
4+
+ py-webrtcvad - `pip install webrtcvad`
5+
'''
6+
import webrtcvad
7+
import collections
8+
import sys
9+
import signal
10+
import pyaudio
11+
12+
from array import array
13+
from struct import pack
14+
import wave
15+
import time
16+
17+
FORMAT = pyaudio.paInt16
18+
CHANNELS = 1
19+
RATE = 16000
20+
CHUNK_DURATION_MS = 30 # supports 10, 20 and 30 (ms)
21+
PADDING_DURATION_MS = 1500 # 1 sec jugement
22+
CHUNK_SIZE = int(RATE * CHUNK_DURATION_MS / 1000) # chunk to read
23+
CHUNK_BYTES = CHUNK_SIZE * 2 # 16bit = 2 bytes, PCM
24+
NUM_PADDING_CHUNKS = int(PADDING_DURATION_MS / CHUNK_DURATION_MS)
25+
# NUM_WINDOW_CHUNKS = int(240 / CHUNK_DURATION_MS)
26+
NUM_WINDOW_CHUNKS = int(400 / CHUNK_DURATION_MS) # 400 ms/ 30ms ge
27+
NUM_WINDOW_CHUNKS_END = NUM_WINDOW_CHUNKS * 2
28+
29+
START_OFFSET = int(NUM_WINDOW_CHUNKS * CHUNK_DURATION_MS * 0.5 * RATE)
30+
31+
vad = webrtcvad.Vad(1)
32+
33+
pa = pyaudio.PyAudio()
34+
stream = pa.open(format=FORMAT,
35+
channels=CHANNELS,
36+
rate=RATE,
37+
input=True,
38+
start=False,
39+
# input_device_index=2,
40+
frames_per_buffer=CHUNK_SIZE)
41+
42+
43+
got_a_sentence = False
44+
leave = False
45+
46+
47+
def handle_int(sig, chunk):
48+
global leave, got_a_sentence
49+
leave = True
50+
got_a_sentence = True
51+
52+
53+
def record_to_file(path, data, sample_width):
54+
"Records from the microphone and outputs the resulting data to 'path'"
55+
# sample_width, data = record()
56+
data = pack('<' + ('h' * len(data)), *data)
57+
wf = wave.open(path, 'wb')
58+
wf.setnchannels(1)
59+
wf.setsampwidth(sample_width)
60+
wf.setframerate(RATE)
61+
wf.writeframes(data)
62+
wf.close()
63+
64+
65+
def normalize(snd_data):
66+
"Average the volume out"
67+
MAXIMUM = 32767 # 16384
68+
times = float(MAXIMUM) / max(abs(i) for i in snd_data)
69+
r = array('h')
70+
for i in snd_data:
71+
r.append(int(i * times))
72+
return r
73+
74+
signal.signal(signal.SIGINT, handle_int)
75+
76+
while not leave:
77+
ring_buffer = collections.deque(maxlen=NUM_PADDING_CHUNKS)
78+
triggered = False
79+
voiced_frames = []
80+
ring_buffer_flags = [0] * NUM_WINDOW_CHUNKS
81+
ring_buffer_index = 0
82+
83+
ring_buffer_flags_end = [0] * NUM_WINDOW_CHUNKS_END
84+
ring_buffer_index_end = 0
85+
buffer_in = ''
86+
# WangS
87+
raw_data = array('h')
88+
index = 0
89+
start_point = 0
90+
StartTime = time.time()
91+
print("* recording: ")
92+
stream.start_stream()
93+
94+
while not got_a_sentence and not leave:
95+
chunk = stream.read(CHUNK_SIZE)
96+
# add WangS
97+
raw_data.extend(array('h', chunk))
98+
index += CHUNK_SIZE
99+
TimeUse = time.time() - StartTime
100+
101+
active = vad.is_speech(chunk, RATE)
102+
103+
sys.stdout.write('1' if active else '_')
104+
ring_buffer_flags[ring_buffer_index] = 1 if active else 0
105+
ring_buffer_index += 1
106+
ring_buffer_index %= NUM_WINDOW_CHUNKS
107+
108+
ring_buffer_flags_end[ring_buffer_index_end] = 1 if active else 0
109+
ring_buffer_index_end += 1
110+
ring_buffer_index_end %= NUM_WINDOW_CHUNKS_END
111+
112+
# start point detection
113+
if not triggered:
114+
ring_buffer.append(chunk)
115+
num_voiced = sum(ring_buffer_flags)
116+
if num_voiced > 0.8 * NUM_WINDOW_CHUNKS:
117+
sys.stdout.write(' Open ')
118+
triggered = True
119+
start_point = index - CHUNK_SIZE * 20 # start point
120+
# voiced_frames.extend(ring_buffer)
121+
ring_buffer.clear()
122+
# end point detection
123+
else:
124+
# voiced_frames.append(chunk)
125+
ring_buffer.append(chunk)
126+
num_unvoiced = NUM_WINDOW_CHUNKS_END - sum(ring_buffer_flags_end)
127+
if num_unvoiced > 0.90 * NUM_WINDOW_CHUNKS_END or TimeUse > 10:
128+
sys.stdout.write(' Close ')
129+
triggered = False
130+
got_a_sentence = True
131+
132+
sys.stdout.flush()
133+
134+
sys.stdout.write('\n')
135+
# data = b''.join(voiced_frames)
136+
137+
stream.stop_stream()
138+
print("* done recording")
139+
got_a_sentence = False
140+
141+
# write to file
142+
raw_data.reverse()
143+
for index in range(start_point):
144+
raw_data.pop()
145+
raw_data.reverse()
146+
raw_data = normalize(raw_data)
147+
record_to_file("recording.wav", raw_data, 2)
148+
leave = True
149+
150+
stream.close()

0 commit comments

Comments
 (0)