Skip to content

Commit ce1a68f

Browse files
committed
update structure
1 parent ed9794f commit ce1a68f

File tree

8 files changed

+454
-138
lines changed

8 files changed

+454
-138
lines changed

.github/workflows/ci.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,3 +30,29 @@ jobs:
3030
- name: Format check with ruff
3131
run: |
3232
ruff format --check
33+
build:
34+
runs-on: ubuntu-latest
35+
steps:
36+
- uses: actions/checkout@v4
37+
- name: Set up Python
38+
uses: actions/setup-python@v5
39+
with:
40+
python-version: "3.10"
41+
cache: "pip"
42+
- name: Install uv
43+
run: |
44+
pip install uv
45+
- name: Install dependencies
46+
run: |
47+
sudo apt-get install libasound-dev portaudio19-dev libportaudio2 libportaudiocpp0 ffmpeg
48+
uv sync
49+
- name: Build package
50+
run: |
51+
uv build
52+
- name: Upload build artifacts
53+
uses: actions/upload-artifact@v4
54+
with:
55+
name: dist
56+
path: dist
57+
overwrite: true
58+

README.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ MCP_ENABLE_SCREEN=true # Enable screen functionality
103103
| -------------------- | ----------------------------------- |
104104
| `list_audio_devices` | List all audio input/output devices |
105105
| `record_audio` | Record from an input device |
106+
| `stop_record_audio` | Stop recording from an input device |
106107
| `play_audio` | Play audio through a device |
107108

108109
### Screen

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@ dependencies = [
99
"mss>=10.0.0",
1010
"numpy>=2.2.6",
1111
"opencv-python>=4.8.0",
12+
"pillow>=11.2.1",
1213
"pyaudio>=0.2.14",
13-
"python-ffmpeg>=2.0.12",
1414
"screeninfo>=0.8.1",
1515
]

src/devices/audio.py

Lines changed: 155 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,16 @@
1-
from typing import Dict, List, Optional
1+
from typing import Dict, List, Optional, Any
22
from typing import Annotated
33
from fastmcp import FastMCP
44
from pydantic import Field
5-
import pyaudio
6-
import wave
5+
import threading
6+
import platform
77
import tempfile
88
import datetime
9+
import pyaudio
10+
import wave
911
import os
10-
import platform
12+
13+
_active_audio_recording = None
1114

1215

1316
def register_tools(app: FastMCP) -> None:
@@ -16,7 +19,7 @@ def register_tools(app: FastMCP) -> None:
1619
description="List all available audio input and output devices",
1720
tags=["audio"],
1821
)
19-
async def list_audio_devices() -> Dict[str, List[Dict[str, any]]]:
22+
async def list_audio_devices() -> Dict[str, List[Dict[str, Any]]]:
2023
try:
2124
p = pyaudio.PyAudio()
2225
except Exception as e:
@@ -72,8 +75,12 @@ async def list_audio_devices() -> Dict[str, List[Dict[str, any]]]:
7275
)
7376
async def record_audio(
7477
duration: Annotated[
75-
float, Field(default=5.0, description="Recording duration in seconds")
76-
],
78+
float,
79+
Field(
80+
default=5.0,
81+
description="Recording duration in seconds. Pass -1 for background recording",
82+
),
83+
] = 5.0,
7784
sample_rate: Annotated[
7885
Optional[int], Field(default=44100, description="Sample rate in Hz")
7986
] = 44100,
@@ -89,7 +96,21 @@ async def record_audio(
8996
default=None, description="Audio input device index (None for default)"
9097
),
9198
] = None,
92-
) -> Dict[str, any]:
99+
) -> Dict[str, Any]:
100+
global _active_audio_recording
101+
102+
if _active_audio_recording is not None:
103+
return {
104+
"success": False,
105+
"error": "Another audio recording is already in progress. Stop it first using stop_record_audio.",
106+
}
107+
108+
if duration != -1 and duration <= 0:
109+
return {
110+
"success": False,
111+
"error": "Duration must be positive or -1 for background recording",
112+
}
113+
93114
chunk = 1024
94115
format = pyaudio.paInt16
95116

@@ -144,6 +165,48 @@ async def record_audio(
144165
error_msg += " ALSA error - try different sample rate or check audio system configuration."
145166
return {"success": False, "error": error_msg}
146167

168+
if duration == -1:
169+
frames = []
170+
stop_event = threading.Event()
171+
172+
def background_record():
173+
try:
174+
while not stop_event.is_set():
175+
data = stream.read(chunk, exception_on_overflow=False)
176+
frames.append(data)
177+
except Exception:
178+
pass
179+
180+
record_thread = threading.Thread(target=background_record)
181+
record_thread.daemon = True
182+
record_thread.start()
183+
184+
_active_audio_recording = {
185+
"stream": stream,
186+
"pyaudio": p,
187+
"frames": frames,
188+
"stop_event": stop_event,
189+
"thread": record_thread,
190+
"output_file": output_file,
191+
"sample_rate": sample_rate,
192+
"channels": channels,
193+
"format": format,
194+
"device_info": device_info,
195+
"start_time": datetime.datetime.now(),
196+
}
197+
198+
return {
199+
"success": True,
200+
"output_file": output_file,
201+
"sample_rate": sample_rate,
202+
"channels": channels,
203+
"device_used": device_info["name"]
204+
if device_info
205+
else "Default device",
206+
"recording_status": "started",
207+
"message": "Background recording started. Use stop_record_audio to stop.",
208+
}
209+
147210
frames = []
148211
total_frames = int(sample_rate / chunk * duration)
149212

@@ -171,7 +234,8 @@ async def record_audio(
171234
except Exception as e:
172235
return {"success": False, "error": str(e)}
173236
finally:
174-
p.terminate()
237+
if duration != -1:
238+
p.terminate()
175239

176240
@app.tool(
177241
name="play_audio",
@@ -186,7 +250,7 @@ async def play_audio(
186250
default=None, description="Audio output device index (None for default)"
187251
),
188252
] = None,
189-
) -> Dict[str, any]:
253+
) -> Dict[str, Any]:
190254
try:
191255
with wave.open(file_path, "rb") as wf:
192256
channels = wf.getnchannels()
@@ -238,15 +302,22 @@ async def play_audio(
238302
error_msg += " ALSA error - try different sample rate or check audio system configuration."
239303
return {"success": False, "error": error_msg}
240304

241-
chunk = 1024
242-
data = wf.readframes(chunk)
243-
244-
while data:
245-
stream.write(data)
246-
data = wf.readframes(chunk)
305+
def play_in_background():
306+
try:
307+
with wave.open(file_path, "rb") as wf_bg:
308+
chunk = 1024
309+
data = wf_bg.readframes(chunk)
310+
while data:
311+
stream.write(data)
312+
data = wf_bg.readframes(chunk)
313+
finally:
314+
stream.stop_stream()
315+
stream.close()
316+
p.terminate()
247317

248-
stream.stop_stream()
249-
stream.close()
318+
play_thread = threading.Thread(target=play_in_background)
319+
play_thread.daemon = True
320+
play_thread.start()
250321

251322
return {
252323
"success": True,
@@ -257,9 +328,12 @@ async def play_audio(
257328
"device_used": device_info["name"]
258329
if device_info
259330
else "Default device",
331+
"status": "playing",
332+
"message": f"Audio playback started in background. Duration: {duration:.2f} seconds",
260333
}
261-
finally:
334+
except Exception as e:
262335
p.terminate()
336+
raise e
263337
except FileNotFoundError:
264338
return {
265339
"success": False,
@@ -282,3 +356,65 @@ async def play_audio(
282356
elif platform.system() == "Darwin" and "CoreAudio" in str(e):
283357
error_msg += " Check macOS audio settings and ensure the device is not in exclusive mode."
284358
return {"success": False, "error": error_msg}
359+
360+
@app.tool(
361+
name="stop_record_audio",
362+
description="Stop the current background audio recording",
363+
tags=["audio"],
364+
)
365+
async def stop_record_audio() -> Dict[str, Any]:
366+
global _active_audio_recording
367+
368+
if _active_audio_recording is None:
369+
return {"success": False, "error": "No active audio recording found"}
370+
371+
try:
372+
recording = _active_audio_recording
373+
recording["stop_event"].set()
374+
recording["thread"].join(timeout=5.0)
375+
recording["stream"].stop_stream()
376+
recording["stream"].close()
377+
378+
start_time = recording["start_time"]
379+
duration = (datetime.datetime.now() - start_time).total_seconds()
380+
381+
output_file = recording["output_file"]
382+
try:
383+
with wave.open(output_file, "wb") as wf:
384+
wf.setnchannels(recording["channels"])
385+
wf.setsampwidth(
386+
recording["pyaudio"].get_sample_size(recording["format"])
387+
)
388+
wf.setframerate(recording["sample_rate"])
389+
wf.writeframes(b"".join(recording["frames"]))
390+
391+
recording["pyaudio"].terminate()
392+
_active_audio_recording = None
393+
394+
if os.path.exists(output_file) and os.path.getsize(output_file) > 0:
395+
return {
396+
"success": True,
397+
"output_file": output_file,
398+
"duration": duration,
399+
"sample_rate": recording["sample_rate"],
400+
"channels": recording["channels"],
401+
"device_used": recording["device_info"]["name"]
402+
if recording["device_info"]
403+
else "Default device",
404+
}
405+
else:
406+
return {
407+
"success": False,
408+
"error": "Recording was stopped but no valid file was created",
409+
}
410+
except Exception as e:
411+
return {
412+
"success": False,
413+
"error": f"Failed to save audio file: {str(e)}",
414+
}
415+
except Exception as e:
416+
_active_audio_recording = None
417+
return {
418+
"success": False,
419+
"error": f"Error stopping audio recording: {str(e)}",
420+
}

0 commit comments

Comments
 (0)