Skip to content

feat: add capture.py - also fixes audio recording #362

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 71 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
71 commits
Select commit Hold shift + click to select a range
7ea743d
merge
0dm Jul 4, 2023
c707f03
Create capture.py
0dm Jul 7, 2023
f5cb9bf
Update capture.py
0dm Jul 7, 2023
fad652d
Update capture.py
0dm Jul 7, 2023
5d41eba
it's finally fixed
0dm Jul 7, 2023
2648581
add dependencies
0dm Jul 7, 2023
b1aa358
comment
0dm Jul 7, 2023
6e4163e
move code + use config.CAPTURE_DIR_PATH
0dm Jul 7, 2023
ac6b02b
remove debug lines
0dm Jul 7, 2023
ac63470
Update capture.py
0dm Jul 7, 2023
fb84ad9
OpenAdaptCapture -> Capture
0dm Jul 7, 2023
e45dfa6
add camera
0dm Jul 7, 2023
be285d4
Let's have this off by default.
0dm Jul 10, 2023
5c14ab5
Merge remote-tracking branch 'upstream/main'
0dm Jul 10, 2023
6620f53
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Jul 11, 2023
709ff38
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Jul 17, 2023
40db7ab
Merge remote-tracking branch 'upstream/main'
0dm Jul 19, 2023
e85444c
Merge remote-tracking branch 'upstream/main'
0dm Jul 25, 2023
7484446
hotfix
0dm Jul 25, 2023
9a67c71
fix
0dm Jul 25, 2023
d3ac547
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Jul 25, 2023
949c018
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Jul 25, 2023
aa704be
linting
0dm Jul 25, 2023
857fb1c
merge
0dm Jul 28, 2023
349a724
Create capture.py
0dm Jul 28, 2023
c3011a4
windows
0dm Jul 28, 2023
f2b19e2
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Jul 28, 2023
7a0b7ae
cleanup + lint
0dm Jul 28, 2023
8b76a6b
Merge branch 'main' into macos-capture
0dm Jul 28, 2023
c876b43
Merge branch 'main' into macos-capture
0dm Jul 31, 2023
6ca4ebc
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Jul 31, 2023
ee6efcb
Update _windows.py
0dm Jul 31, 2023
186914d
Merge branch 'macos-capture' of https://github.com/0dm/OpenAdapt into…
0dm Jul 31, 2023
030ffb4
Merge remote-tracking branch 'upstream/main'
0dm Aug 1, 2023
87dda56
add audio + new windows recording
0dm Aug 2, 2023
904867a
screen_recorder.free_resources()
0dm Aug 2, 2023
b4fea77
Update _windows.py
0dm Aug 2, 2023
528e501
isort
0dm Aug 2, 2023
f73697e
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 4, 2023
5f60828
Merge remote-tracking branch 'upstream/main'
0dm Aug 7, 2023
13307d1
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 10, 2023
92ff5a5
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 11, 2023
c4e668a
Merge remote-tracking branch 'upstream/main'
0dm Aug 15, 2023
02f48c8
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 18, 2023
5dc6735
add playback recording
0dm Aug 18, 2023
3d2dc53
Update replay.py
0dm Aug 18, 2023
78f1a25
Update replay.py
0dm Aug 18, 2023
27ce545
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 21, 2023
a98cf3b
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 24, 2023
2be979a
Merge remote-tracking branch 'upstream/main'
0dm Aug 25, 2023
7064103
Update README.md
0dm Aug 25, 2023
0fe8156
Revert "Update README.md"
0dm Aug 25, 2023
f3e432a
Update README.md
0dm Aug 25, 2023
e17e795
Revert "Revert "Update README.md""
0dm Aug 25, 2023
f0b161e
Merge branch 'main' of https://github.com/0dm/OpenAdapt
0dm Aug 25, 2023
278a108
Update README.md
0dm Aug 25, 2023
d79946b
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 25, 2023
5801cef
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 28, 2023
90ba6f3
run pre-commit
0dm Aug 28, 2023
7e7f068
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 28, 2023
ee1b2fe
Update pyproject.toml
0dm Aug 28, 2023
ad78215
Update openadapt/replay.py
abrichr Aug 28, 2023
5ca69c6
Update openadapt/replay.py
abrichr Aug 28, 2023
6caa478
Merge remote-tracking branch 'upstream/main' into macos-capture
0dm Aug 28, 2023
a8a59aa
Update openadapt/replay.py
abrichr Aug 28, 2023
628b5b6
Merge branch 'macos-capture' of https://github.com/0dm/OpenAdapt into…
0dm Aug 28, 2023
c399b5a
Update replay.py
0dm Aug 28, 2023
ae46894
update poetry.lock
abrichr Aug 28, 2023
270b1e0
merge main
abrichr Aug 28, 2023
2326c13
merge to main and lock
abrichr Aug 29, 2023
ca8f8c4
Merge branch 'main' into macos-capture
abrichr Aug 29, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)
[Join us on Slack](https://join.slack.com/t/mldsai/shared_invite/zt-1uf94nn7r-qcQnS~hinLPKftUapNzbuw)

# OpenAdapt: AI-First Process Automation with Transformers

Expand Down
44 changes: 44 additions & 0 deletions openadapt/capture/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
"""Capture the screen, audio, and camera as a video on macOS and Windows.

Module: capture.py
"""
import sys

if sys.platform == "darwin":
from . import _macos as impl
elif sys.platform == "win32":
from . import _windows as impl
else:
raise Exception(f"Unsupported platform: {sys.platform}")

device = impl.Capture()


def get_capture() -> impl.Capture:
"""Get the capture object.

Returns:
Capture: The capture object.
"""
return device


def start(audio: bool = False, camera: bool = False) -> None:
"""Start the capture."""
device.start(audio=audio, camera=camera)


def stop() -> None:
"""Stop the capture."""
device.stop()


def test() -> None:
"""Test the capture."""
device.start()
input("Press enter to stop")
device.stop()


if __name__ in ("__main__", "capture"):
test()
117 changes: 117 additions & 0 deletions openadapt/capture/_macos.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
"""Allows for capturing the screen and audio on macOS.

This is based on: https://gist.github.com/timsutton/0c6439eb6eb1621a5964

usage: see bottom of file
"""
from datetime import datetime
from sys import platform
import os

from Foundation import NSURL, NSObject # type: ignore # noqa
from Quartz import CGMainDisplayID # type: ignore # noqa
import AVFoundation as AVF # type: ignore # noqa
import objc # type: ignore # noqa

from openadapt import config


class Capture:
"""Capture the screen, audio, and camera on macOS."""

def __init__(self) -> None:
"""Initialize the capture object."""
if platform != "darwin":
raise NotImplementedError(
"This is the macOS implementation, please use the Windows version"
)

objc.options.structs_indexable = True

def start(self, audio: bool = False, camera: bool = False) -> None:
"""Start capturing the screen, audio, and camera.

Args:
audio (bool, optional): Whether to capture audio (default: False).
camera (bool, optional): Whether to capture the camera (default: False).
"""
self.display_id = CGMainDisplayID()
self.session = AVF.AVCaptureSession.alloc().init()
self.screen_input = AVF.AVCaptureScreenInput.alloc().initWithDisplayID_(
self.display_id
)
self.file_output = AVF.AVCaptureMovieFileOutput.alloc().init()
self.camera_session = None # not used if camera=False

# Create an audio device input with the default audio device
self.audio_input = AVF.AVCaptureDeviceInput.alloc().initWithDevice_error_(
AVF.AVCaptureDevice.defaultDeviceWithMediaType_(AVF.AVMediaTypeAudio), None
)

if not os.path.exists(config.CAPTURE_DIR_PATH):
os.mkdir(config.CAPTURE_DIR_PATH)
self.file_url = NSURL.fileURLWithPath_(
os.path.join(
config.CAPTURE_DIR_PATH,
datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".mov",
)
)
if audio and self.session.canAddInput_(self.audio_input[0]):
self.session.addInput_(self.audio_input[0])

if self.session.canAddInput_(self.screen_input):
self.session.addInput_(self.screen_input)

self.session.addOutput_(self.file_output)

self.session.startRunning()

# Cheat and pass a dummy delegate object where
# normally we'd have a AVCaptureFileOutputRecordingDelegate
self.file_url = (
self.file_output.startRecordingToOutputFileURL_recordingDelegate_(
self.file_url, NSObject.alloc().init()
)
)

if camera:
self._use_camera()

def _use_camera(self) -> None:
"""Start capturing the camera."""
self.camera_session = AVF.AVCaptureSession.alloc().init()
self.camera_file_output = AVF.AVCaptureMovieFileOutput.alloc().init()
self.camera_input = AVF.AVCaptureDeviceInput.alloc().initWithDevice_error_(
AVF.AVCaptureDevice.defaultDeviceWithMediaType_(AVF.AVMediaTypeVideo), None
)

if self.camera_session.canAddInput_(self.camera_input[0]):
self.camera_session.addInput_(self.camera_input[0])
self.camera_session.startRunning()

self.camera_session.addOutput_(self.camera_file_output)

self.camera_url = (
self.camera_file_output.startRecordingToOutputFileURL_recordingDelegate_(
NSURL.fileURLWithPath_(
os.path.join(
config.CAPTURE_DIR_PATH,
datetime.now().strftime("camera.%Y-%m-%d-%H-%M-%S") + ".mov",
)
),
NSObject.alloc().init(),
)
)

def stop(self) -> None:
"""Stop capturing the screen, audio, and camera."""
self.session.stopRunning()
if self.camera_session:
self.camera_session.stopRunning()


if __name__ == "__main__":
capture = Capture()
capture.start(audio=True, camera=False)
input("Press enter to stop")
capture.stop()
103 changes: 103 additions & 0 deletions openadapt/capture/_windows.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
"""Allows for capturing the screen and audio on Windows."""
from datetime import datetime
from sys import platform
import os
import wave

from screen_recorder_sdk import screen_recorder
import pyaudio

from openadapt import config


class Capture:
"""Capture the screen video and audio on Windows."""

def __init__(self, pid: int = 0) -> None:
"""Initialize the capture object.

Args:
pid (int, optional): The process ID of the window to capture.
Defaults to 0 (the entire screen)
"""
if platform != "win32":
raise NotImplementedError(
"This is the Windows implementation, please use the macOS version"
)
self.is_recording = False
self.video_out = None
self.audio_out = None
self.pid = pid

screen_recorder.init_resources(screen_recorder.RecorderParams(pid=self.pid))

# Initialize PyAudio
self.audio = pyaudio.PyAudio()
self.audio_stream = None
self.audio_frames = []

def start(self, audio: bool = True) -> None:
"""Start capturing the screen video and audio.

Args:
audio (bool): Whether to capture audio.
"""
if self.is_recording:
raise RuntimeError("Recording is already in progress")
self.is_recording = True

# Start video recording
self.video_out = os.path.join(
config.CAPTURES_DIR,
datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".mov",
)
screen_recorder.start_video_recording(self.video_out, 30, 8000000, True)

# Start audio recording
if audio:
self.audio_out = os.path.join(
config.CAPTURES_DIR,
datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + ".wav",
)
self.audio_stream = self.audio.open(
format=pyaudio.paInt16,
channels=2,
rate=44100,
input=True,
frames_per_buffer=1024,
stream_callback=self._audio_callback,
)
self.audio_frames = []

def _audio_callback(
self, in_data: bytes, frame_count: int, time_info: dict, status: int
) -> tuple:
self.audio_frames.append(in_data)
return (None, pyaudio.paContinue)

def stop(self) -> None:
"""Stop capturing the screen video and audio."""
if self.is_recording:
screen_recorder.stop_video_recording()
if self.audio_stream:
self.audio_stream.stop_stream()
self.audio_stream.close()
self.audio.terminate()
self.save_audio()
self.is_recording = False
screen_recorder.free_resources()

def save_audio(self) -> None:
"""Save the captured audio to a WAV file."""
with wave.open(self.audio_out, "wb") as wf:
wf.setnchannels(2)
wf.setsampwidth(self.audio.get_sample_size(pyaudio.paInt16))
wf.setframerate(44100)
wf.writeframes(b"".join(self.audio_frames))


if __name__ == "__main__":
capture = Capture()
capture.start()
input("Press enter to stop")
capture.stop()
4 changes: 4 additions & 0 deletions openadapt/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
"ACTION_TEXT_SEP": "-",
"ACTION_TEXT_NAME_PREFIX": "<",
"ACTION_TEXT_NAME_SUFFIX": ">",
# PERFORMANCE PLOTTING CONFIGURATION
"PLOT_PERFORMANCE": True,
# CAPTURE CONFIGURATION
"CAPTURE_DIR_PATH": "captures",
# APP CONFIGURATIONS
"APP_DARK_MODE": False,
# SCRUBBING CONFIGURATIONS
Expand Down
33 changes: 29 additions & 4 deletions openadapt/replay.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,13 +10,14 @@
--timestamp=<timestamp> Timestamp of the recording to replay.

"""

from time import sleep
from typing import Union
import os

from loguru import logger
import fire

from openadapt import crud, utils
from openadapt import capture, crud, utils
from openadapt.models import Recording

LOG_LEVEL = "INFO"
Expand All @@ -25,6 +26,7 @@
@logger.catch
def replay(
strategy_name: str,
record: bool = False,
timestamp: Union[str, None] = None,
recording: Recording = None,
) -> bool:
Expand All @@ -34,6 +36,7 @@ def replay(
strategy_name (str): Name of the replay strategy to use.
timestamp (str, optional): Timestamp of the recording to replay.
recording (Recording, optional): Recording to replay.
record (bool, optional): Flag indicating whether to record the replay.

Returns:
bool: True if replay was successful, None otherwise.
Expand Down Expand Up @@ -66,8 +69,30 @@ def replay(
strategy = strategy_class(recording)
logger.info(f"{strategy=}")

strategy.run()
return True
handler = None
rval = True
if record:
capture.start(audio=False, camera=False)
# TODO: handle this more robustly
sleep(1)
file_name = f"log-{strategy_name}-{recording.timestamp}.log"
# TODO: make configurable
dir_name = "captures"
file_path = os.path.join(dir_name, file_name)
logger.info(f"{file_path=}")
handler = logger.add(open(file_path, "w"))
try:
strategy.run()
except Exception as e:
logger.exception(e)
rval = False

if record:
sleep(1)
capture.stop()
logger.remove(handler)

return rval


# Entry point
Expand Down
1 change: 0 additions & 1 deletion openadapt/window/_macos.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def get_active_window(window_meta: dict) -> ApplicationServices.AXUIElementRef |
return None
return window


def get_window_data(window_meta: dict) -> dict:
"""Get the data of the window.

Expand Down
Loading