diff --git a/.gitignore b/.gitignore index 330d167..b2aca03 100644 --- a/.gitignore +++ b/.gitignore @@ -3,15 +3,15 @@ # gitignore contributors: remember to update Global/Xcode.gitignore, Objective-C.gitignore & Swift.gitignore ## User settings -xcuserdata/ +macOS/xcuserdata/ ## compatibility with Xcode 8 and earlier (ignoring not required starting Xcode 9) *.xcscmblueprint *.xccheckout ## compatibility with Xcode 3 and earlier (ignoring not required starting Xcode 4) -build/ -DerivedData/ +macOS/build/ +macOS/DerivedData/ *.moved-aside *.pbxuser !default.pbxuser @@ -31,7 +31,7 @@ DerivedData/ *.dSYM ## Playgrounds -timeline.xctimeline +preprocessor/timeline.xctimeline playground.xcworkspace # Swift Package Manager @@ -54,10 +54,8 @@ playground.xcworkspace # you should judge for yourself, the pros and cons are mentioned at: # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control # -# Pods/ -# -# Add this line if you want to avoid checking in source code from the Xcode workspace -# *.xcworkspace +macOS/Pods/ +macOS/*.xcworkspace # Carthage # @@ -88,3 +86,4 @@ fastlane/test_output # https://github.com/johnno1962/injectionforxcode iOSInjectionProject/ +.DS_Store diff --git a/README.md b/README.md index b16027e..faa9497 100644 --- a/README.md +++ b/README.md @@ -1 +1,54 @@ -# sign-language-recognition-trainer \ No newline at end of file +# Pose Data Annotator + +The **Pose Data Annotator** app is a handy desktop app for seamless Vision analysis of body, hand and face landmarks for applied tasks such as sign language recognition, action classification, and more. The app makes it easy to annotate both single videos and large datasets, and output the data in a standardized CSV file for any further work. + +![App screenshot 1: Annotate video](http://data.matsworld.io/body-pose-annotator/screenshot_1.png) +![App screenshot 2: Annotate dataset](http://data.matsworld.io/body-pose-annotator/screenshot_2.png) + +### Backbone + +The backbone of the body, hand, and face pose analysis is built on top of the [Vision](https://developer.apple.com/documentation/vision) framework by Apple. We have found it to be working significantly better and more efficiently than the other publicly available frameworks. + +### Data format + +The data is saved in a custom data format into a `.csv` file. More information regarding the formatting system can be found [here](data_format.md). The app also supports the following data annotations formats: + +- ... + +## Installation + +### Download the app + +The latest (as well as all the previous stable builds) can be downloaded as a `.dmg` here. + +#### Requirements + +- macOS 11.0+ (Big Sur and above) + +### Build from source + +You can build Latest directly on your machine. To do that, you have to download the source code by cloning the repository: `git clone https://github.com/thanhdolong/sign-language-recognition-trainer.git`. + +Then you can open the BodyPoseAnnotator.xcodeproj and hit **Build and Run**. Make sure that the BodyPoseAnnotator scheme is selected. + +#### Requirements + +- macOS 11.0+ (Big Sur and above) +- Xcode 12.0+ +- Swift 5 + +## Usage + +The app has two main ways for annotation – either from single videos, or from full datasets. Either way, please ensure that all of your videos are in the `.mp4` format. + +### Single video + +To annotate a single video, simply select the **Annotate video** section in the left navigation bar. Then either drag-and-drop your file, or select it using the **Load Video** button. Then start the analysis using the **Start processing** button. Once the analysis is finished, you will be prompted with an alert view to save the resulting `.csv` file. + +### Full dataset + +To annotate a full dataset, ensure that you have a dataset folder with structured folders of the individual labels and the videos inside them. There should also be no other files irrelevant to the analysis. Then select the **Annotate dataset** section in the left navigation bar and either drag-and-drop your folder, or select it using the **Load Dataset** button. You can start the analysis using the **Start processing** button. Once the analysis is finished, you will be prompted with an alert view to save the resulting `.csv` file. + +## Contribution + +Any contribution is highly encouraged and welcome! You can take a look at the [Issues](https://github.com/thanhdolong/sign-language-recognition-trainer/issues) section to see what you can do. If you spot a bug, please file a Bug Report, or if you have some idea of your own, please submit a Feature Request. Use the according templates for both please and provide as much information or context as possible. diff --git a/macOS/DataAnnotation.xcodeproj/project.pbxproj b/macOS/DataAnnotation.xcodeproj/project.pbxproj new file mode 100644 index 0000000..f8cd0f7 --- /dev/null +++ b/macOS/DataAnnotation.xcodeproj/project.pbxproj @@ -0,0 +1,674 @@ +// !$*UTF8*$! +{ + archiveVersion = 1; + classes = { + }; + objectVersion = 50; + objects = { + +/* Begin PBXBuildFile section */ + 6B6CD9F325B4658400691F2F /* AsyncOperation.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6B6CD9F225B4658400691F2F /* AsyncOperation.swift */; }; + 6B6CD9F925B4660500691F2F /* VideoAnalysisOperation.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6B6CD9F825B4660500691F2F /* VideoAnalysisOperation.swift */; }; + 6F3FB5CB2595652100972A4C /* URL+.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F3FB5CA2595652100972A4C /* URL+.swift */; }; + 6F97A82F2644363D007229F8 /* CardViewModifier.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F97A82E2644363D007229F8 /* CardViewModifier.swift */; }; + 6F9FA25B2589627700672FDA /* AppDelegate.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA25A2589627700672FDA /* AppDelegate.swift */; }; + 6F9FA25F2589627800672FDA /* Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 6F9FA25E2589627800672FDA /* Assets.xcassets */; }; + 6F9FA2622589627800672FDA /* Preview Assets.xcassets in Resources */ = {isa = PBXBuildFile; fileRef = 6F9FA2612589627800672FDA /* Preview Assets.xcassets */; }; + 6F9FA26E2589627800672FDA /* DataAnnotationTests.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA26D2589627800672FDA /* DataAnnotationTests.swift */; }; + 6F9FA290258962A900672FDA /* DatasetManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA28C258962A900672FDA /* DatasetManager.swift */; }; + 6F9FA291258962A900672FDA /* VideoProcessingManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA28D258962A900672FDA /* VideoProcessingManager.swift */; }; + 6F9FA292258962A900672FDA /* VisionAnalysisManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA28E258962A900672FDA /* VisionAnalysisManager.swift */; }; + 6F9FA293258962A900672FDA /* DataStructuringManager.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA28F258962A900672FDA /* DataStructuringManager.swift */; }; + 6F9FA29A258962C300672FDA /* Dictionary+.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA297258962C300672FDA /* Dictionary+.swift */; }; + 6F9FA29B258962C300672FDA /* FilePanel+.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA298258962C300672FDA /* FilePanel+.swift */; }; + 6F9FA29C258962C400672FDA /* VNRecognizedPointsObservation+.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA299258962C300672FDA /* VNRecognizedPointsObservation+.swift */; }; + 6F9FA2A8258962F500672FDA /* ObservationConfiguration.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA2A5258962F500672FDA /* ObservationConfiguration.swift */; }; + 6F9FA2A9258962F500672FDA /* ObservationTerminology.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA2A6258962F500672FDA /* ObservationTerminology.swift */; }; + 6F9FA2AA258962F500672FDA /* MachineLearningConfiguration.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA2A7258962F500672FDA /* MachineLearningConfiguration.swift */; }; + 6F9FA2BC258963DF00672FDA /* ContentView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA2B9258963DF00672FDA /* ContentView.swift */; }; + 6F9FA2BD258963DF00672FDA /* AnotateVideoView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA2BA258963DF00672FDA /* AnotateVideoView.swift */; }; + 6F9FA2BE258963DF00672FDA /* AnotateVideoViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6F9FA2BB258963DF00672FDA /* AnotateVideoViewModel.swift */; }; + 6FF2A4352594F7E200AD5C44 /* Constants.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FF2A4342594F7E200AD5C44 /* Constants.swift */; }; + 6FF2A43D25952BC300AD5C44 /* Array+.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FF2A43C25952BC300AD5C44 /* Array+.swift */; }; + 6FF2A4432595309E00AD5C44 /* AnotateDatasetView.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FF2A4422595309E00AD5C44 /* AnotateDatasetView.swift */; }; + 6FF2A447259530AB00AD5C44 /* AnotateDatasetViewModel.swift in Sources */ = {isa = PBXBuildFile; fileRef = 6FF2A446259530AB00AD5C44 /* AnotateDatasetViewModel.swift */; }; + F5C4C4AC25A88D94009E31DD /* AnalysisSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F5C4C4A825A88D94009E31DD /* AnalysisSettingsView.swift */; }; + F5C4C4AD25A88D94009E31DD /* GeneralSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F5C4C4A925A88D94009E31DD /* GeneralSettingsView.swift */; }; + F5C4C4AE25A88D94009E31DD /* SettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F5C4C4AA25A88D94009E31DD /* SettingsView.swift */; }; + F5C4C4AF25A88D94009E31DD /* VideoSettingsView.swift in Sources */ = {isa = PBXBuildFile; fileRef = F5C4C4AB25A88D94009E31DD /* VideoSettingsView.swift */; }; + F5EF5C4B25C2F04D00964DE0 /* Icons.icns in Resources */ = {isa = PBXBuildFile; fileRef = F5EF5C4A25C2F04D00964DE0 /* Icons.icns */; }; +/* End PBXBuildFile section */ + +/* Begin PBXContainerItemProxy section */ + 6F9FA26A2589627800672FDA /* PBXContainerItemProxy */ = { + isa = PBXContainerItemProxy; + containerPortal = 6F9FA24F2589627700672FDA /* Project object */; + proxyType = 1; + remoteGlobalIDString = 6F9FA2562589627700672FDA; + remoteInfo = DataAnnotation; + }; +/* End PBXContainerItemProxy section */ + +/* Begin PBXFileReference section */ + 6B6CD9F225B4658400691F2F /* AsyncOperation.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AsyncOperation.swift; sourceTree = ""; }; + 6B6CD9F825B4660500691F2F /* VideoAnalysisOperation.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = VideoAnalysisOperation.swift; sourceTree = ""; }; + 6F3FB5CA2595652100972A4C /* URL+.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "URL+.swift"; sourceTree = ""; }; + 6F97A82E2644363D007229F8 /* CardViewModifier.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = CardViewModifier.swift; sourceTree = ""; }; + 6F9FA2572589627700672FDA /* Pose Data Annotator.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = "Pose Data Annotator.app"; sourceTree = BUILT_PRODUCTS_DIR; }; + 6F9FA25A2589627700672FDA /* AppDelegate.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AppDelegate.swift; sourceTree = ""; }; + 6F9FA25E2589627800672FDA /* Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = Assets.xcassets; sourceTree = ""; }; + 6F9FA2612589627800672FDA /* Preview Assets.xcassets */ = {isa = PBXFileReference; lastKnownFileType = folder.assetcatalog; path = "Preview Assets.xcassets"; sourceTree = ""; }; + 6F9FA2632589627800672FDA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 6F9FA2642589627800672FDA /* DataAnnotation.entitlements */ = {isa = PBXFileReference; lastKnownFileType = text.plist.entitlements; path = DataAnnotation.entitlements; sourceTree = ""; }; + 6F9FA2692589627800672FDA /* DataAnnotationTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = DataAnnotationTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + 6F9FA26D2589627800672FDA /* DataAnnotationTests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DataAnnotationTests.swift; sourceTree = ""; }; + 6F9FA26F2589627800672FDA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 6F9FA2782589627800672FDA /* DataAnnotationUITests.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = DataAnnotationUITests.swift; sourceTree = ""; }; + 6F9FA27A2589627800672FDA /* Info.plist */ = {isa = PBXFileReference; lastKnownFileType = text.plist.xml; path = Info.plist; sourceTree = ""; }; + 6F9FA28C258962A900672FDA /* DatasetManager.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DatasetManager.swift; sourceTree = ""; }; + 6F9FA28D258962A900672FDA /* VideoProcessingManager.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoProcessingManager.swift; sourceTree = ""; }; + 6F9FA28E258962A900672FDA /* VisionAnalysisManager.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VisionAnalysisManager.swift; sourceTree = ""; }; + 6F9FA28F258962A900672FDA /* DataStructuringManager.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = DataStructuringManager.swift; sourceTree = ""; }; + 6F9FA297258962C300672FDA /* Dictionary+.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "Dictionary+.swift"; sourceTree = ""; }; + 6F9FA298258962C300672FDA /* FilePanel+.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "FilePanel+.swift"; sourceTree = ""; }; + 6F9FA299258962C300672FDA /* VNRecognizedPointsObservation+.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "VNRecognizedPointsObservation+.swift"; sourceTree = ""; }; + 6F9FA2A5258962F500672FDA /* ObservationConfiguration.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ObservationConfiguration.swift; sourceTree = ""; }; + 6F9FA2A6258962F500672FDA /* ObservationTerminology.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ObservationTerminology.swift; sourceTree = ""; }; + 6F9FA2A7258962F500672FDA /* MachineLearningConfiguration.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = MachineLearningConfiguration.swift; sourceTree = ""; }; + 6F9FA2B9258963DF00672FDA /* ContentView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = ContentView.swift; sourceTree = ""; }; + 6F9FA2BA258963DF00672FDA /* AnotateVideoView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnotateVideoView.swift; sourceTree = ""; }; + 6F9FA2BB258963DF00672FDA /* AnotateVideoViewModel.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnotateVideoViewModel.swift; sourceTree = ""; }; + 6FF2A4342594F7E200AD5C44 /* Constants.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = Constants.swift; sourceTree = ""; }; + 6FF2A43C25952BC300AD5C44 /* Array+.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = "Array+.swift"; sourceTree = ""; }; + 6FF2A4422595309E00AD5C44 /* AnotateDatasetView.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AnotateDatasetView.swift; sourceTree = ""; }; + 6FF2A446259530AB00AD5C44 /* AnotateDatasetViewModel.swift */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.swift; path = AnotateDatasetViewModel.swift; sourceTree = ""; }; + F5C4C4A825A88D94009E31DD /* AnalysisSettingsView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = AnalysisSettingsView.swift; sourceTree = ""; }; + F5C4C4A925A88D94009E31DD /* GeneralSettingsView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = GeneralSettingsView.swift; sourceTree = ""; }; + F5C4C4AA25A88D94009E31DD /* SettingsView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = SettingsView.swift; sourceTree = ""; }; + F5C4C4AB25A88D94009E31DD /* VideoSettingsView.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = VideoSettingsView.swift; sourceTree = ""; }; + F5EF5C4A25C2F04D00964DE0 /* Icons.icns */ = {isa = PBXFileReference; lastKnownFileType = image.icns; name = Icons.icns; path = Assets.xcassets/Icons.icns; sourceTree = ""; }; +/* End PBXFileReference section */ + +/* Begin PBXFrameworksBuildPhase section */ + 6F9FA2542589627700672FDA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 6F9FA2662589627800672FDA /* Frameworks */ = { + isa = PBXFrameworksBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXFrameworksBuildPhase section */ + +/* Begin PBXGroup section */ + 6B6CD9F125B4656F00691F2F /* Operations */ = { + isa = PBXGroup; + children = ( + 6B6CD9F225B4658400691F2F /* AsyncOperation.swift */, + 6B6CD9F825B4660500691F2F /* VideoAnalysisOperation.swift */, + ); + path = Operations; + sourceTree = ""; + }; + 6F97A82D264435E0007229F8 /* Modifiers */ = { + isa = PBXGroup; + children = ( + 6F97A82E2644363D007229F8 /* CardViewModifier.swift */, + ); + path = Modifiers; + sourceTree = ""; + }; + 6F9FA24E2589627700672FDA = { + isa = PBXGroup; + children = ( + 6F9FA2592589627700672FDA /* DataAnnotation */, + 6F9FA26C2589627800672FDA /* DataAnnotationTests */, + 6F9FA2772589627800672FDA /* DataAnnotationUITests */, + 6F9FA2582589627700672FDA /* Products */, + ); + sourceTree = ""; + }; + 6F9FA2582589627700672FDA /* Products */ = { + isa = PBXGroup; + children = ( + 6F9FA2572589627700672FDA /* Pose Data Annotator.app */, + 6F9FA2692589627800672FDA /* DataAnnotationTests.xctest */, + ); + name = Products; + sourceTree = ""; + }; + 6F9FA2592589627700672FDA /* DataAnnotation */ = { + isa = PBXGroup; + children = ( + 6B6CD9F125B4656F00691F2F /* Operations */, + 6F9FA29F258962CF00672FDA /* Application */, + 6F9FA2A4258962EA00672FDA /* Configuration */, + 6F9FA296258962B800672FDA /* Extensions */, + 6F9FA28B2589629E00672FDA /* Managers */, + 6F9FA2AD2589630000672FDA /* Resources */, + 6F9FA2B22589633000672FDA /* Screens */, + ); + path = DataAnnotation; + sourceTree = ""; + }; + 6F9FA26C2589627800672FDA /* DataAnnotationTests */ = { + isa = PBXGroup; + children = ( + 6F9FA26D2589627800672FDA /* DataAnnotationTests.swift */, + 6F9FA26F2589627800672FDA /* Info.plist */, + ); + path = DataAnnotationTests; + sourceTree = ""; + }; + 6F9FA2772589627800672FDA /* DataAnnotationUITests */ = { + isa = PBXGroup; + children = ( + 6F9FA2782589627800672FDA /* DataAnnotationUITests.swift */, + 6F9FA27A2589627800672FDA /* Info.plist */, + ); + path = DataAnnotationUITests; + sourceTree = ""; + }; + 6F9FA28B2589629E00672FDA /* Managers */ = { + isa = PBXGroup; + children = ( + 6F9FA28C258962A900672FDA /* DatasetManager.swift */, + 6F9FA28F258962A900672FDA /* DataStructuringManager.swift */, + 6F9FA28D258962A900672FDA /* VideoProcessingManager.swift */, + 6F9FA28E258962A900672FDA /* VisionAnalysisManager.swift */, + ); + path = Managers; + sourceTree = ""; + }; + 6F9FA296258962B800672FDA /* Extensions */ = { + isa = PBXGroup; + children = ( + 6F9FA297258962C300672FDA /* Dictionary+.swift */, + 6F9FA298258962C300672FDA /* FilePanel+.swift */, + 6F9FA299258962C300672FDA /* VNRecognizedPointsObservation+.swift */, + 6FF2A43C25952BC300AD5C44 /* Array+.swift */, + 6F3FB5CA2595652100972A4C /* URL+.swift */, + ); + path = Extensions; + sourceTree = ""; + }; + 6F9FA29F258962CF00672FDA /* Application */ = { + isa = PBXGroup; + children = ( + 6F9FA25A2589627700672FDA /* AppDelegate.swift */, + 6FF2A4342594F7E200AD5C44 /* Constants.swift */, + ); + path = Application; + sourceTree = ""; + }; + 6F9FA2A4258962EA00672FDA /* Configuration */ = { + isa = PBXGroup; + children = ( + 6F9FA2A7258962F500672FDA /* MachineLearningConfiguration.swift */, + 6F9FA2A5258962F500672FDA /* ObservationConfiguration.swift */, + 6F9FA2A6258962F500672FDA /* ObservationTerminology.swift */, + ); + path = Configuration; + sourceTree = ""; + }; + 6F9FA2AD2589630000672FDA /* Resources */ = { + isa = PBXGroup; + children = ( + F5EF5C4A25C2F04D00964DE0 /* Icons.icns */, + 6F9FA25E2589627800672FDA /* Assets.xcassets */, + 6F9FA2612589627800672FDA /* Preview Assets.xcassets */, + 6F9FA2632589627800672FDA /* Info.plist */, + 6F9FA2642589627800672FDA /* DataAnnotation.entitlements */, + ); + path = Resources; + sourceTree = ""; + }; + 6F9FA2B22589633000672FDA /* Screens */ = { + isa = PBXGroup; + children = ( + 6F97A82D264435E0007229F8 /* Modifiers */, + F5C4C4A725A88D6C009E31DD /* Settings */, + 6FF2A4412595308900AD5C44 /* Anotate Dataset */, + 6FF2A4402595308300AD5C44 /* Anotate Video */, + 6F9FA2B9258963DF00672FDA /* ContentView.swift */, + ); + path = Screens; + sourceTree = ""; + }; + 6FF2A4402595308300AD5C44 /* Anotate Video */ = { + isa = PBXGroup; + children = ( + 6F9FA2BA258963DF00672FDA /* AnotateVideoView.swift */, + 6F9FA2BB258963DF00672FDA /* AnotateVideoViewModel.swift */, + ); + path = "Anotate Video"; + sourceTree = ""; + }; + 6FF2A4412595308900AD5C44 /* Anotate Dataset */ = { + isa = PBXGroup; + children = ( + 6FF2A4422595309E00AD5C44 /* AnotateDatasetView.swift */, + 6FF2A446259530AB00AD5C44 /* AnotateDatasetViewModel.swift */, + ); + path = "Anotate Dataset"; + sourceTree = ""; + }; + F5C4C4A725A88D6C009E31DD /* Settings */ = { + isa = PBXGroup; + children = ( + F5C4C4A825A88D94009E31DD /* AnalysisSettingsView.swift */, + F5C4C4A925A88D94009E31DD /* GeneralSettingsView.swift */, + F5C4C4AA25A88D94009E31DD /* SettingsView.swift */, + F5C4C4AB25A88D94009E31DD /* VideoSettingsView.swift */, + ); + path = Settings; + sourceTree = ""; + }; +/* End PBXGroup section */ + +/* Begin PBXNativeTarget section */ + 6F9FA2562589627700672FDA /* DataAnnotation */ = { + isa = PBXNativeTarget; + buildConfigurationList = 6F9FA27D2589627900672FDA /* Build configuration list for PBXNativeTarget "DataAnnotation" */; + buildPhases = ( + 6F9FA2532589627700672FDA /* Sources */, + 6F9FA2542589627700672FDA /* Frameworks */, + 6F9FA2552589627700672FDA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + ); + name = DataAnnotation; + productName = DataAnnotation; + productReference = 6F9FA2572589627700672FDA /* Pose Data Annotator.app */; + productType = "com.apple.product-type.application"; + }; + 6F9FA2682589627800672FDA /* DataAnnotationTests */ = { + isa = PBXNativeTarget; + buildConfigurationList = 6F9FA2802589627900672FDA /* Build configuration list for PBXNativeTarget "DataAnnotationTests" */; + buildPhases = ( + 6F9FA2652589627800672FDA /* Sources */, + 6F9FA2662589627800672FDA /* Frameworks */, + 6F9FA2672589627800672FDA /* Resources */, + ); + buildRules = ( + ); + dependencies = ( + 6F9FA26B2589627800672FDA /* PBXTargetDependency */, + ); + name = DataAnnotationTests; + productName = DataAnnotationTests; + productReference = 6F9FA2692589627800672FDA /* DataAnnotationTests.xctest */; + productType = "com.apple.product-type.bundle.unit-test"; + }; +/* End PBXNativeTarget section */ + +/* Begin PBXProject section */ + 6F9FA24F2589627700672FDA /* Project object */ = { + isa = PBXProject; + attributes = { + LastSwiftUpdateCheck = 1230; + LastUpgradeCheck = 1240; + TargetAttributes = { + 6F9FA2562589627700672FDA = { + CreatedOnToolsVersion = 12.3; + }; + 6F9FA2682589627800672FDA = { + CreatedOnToolsVersion = 12.3; + TestTargetID = 6F9FA2562589627700672FDA; + }; + }; + }; + buildConfigurationList = 6F9FA2522589627700672FDA /* Build configuration list for PBXProject "DataAnnotation" */; + compatibilityVersion = "Xcode 9.3"; + developmentRegion = en; + hasScannedForEncodings = 0; + knownRegions = ( + en, + Base, + de, + es, + it, + sk, + cs, + hu, + pl, + fr, + hr, + ); + mainGroup = 6F9FA24E2589627700672FDA; + productRefGroup = 6F9FA2582589627700672FDA /* Products */; + projectDirPath = ""; + projectRoot = ""; + targets = ( + 6F9FA2562589627700672FDA /* DataAnnotation */, + 6F9FA2682589627800672FDA /* DataAnnotationTests */, + ); + }; +/* End PBXProject section */ + +/* Begin PBXResourcesBuildPhase section */ + 6F9FA2552589627700672FDA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 6F9FA2622589627800672FDA /* Preview Assets.xcassets in Resources */, + F5EF5C4B25C2F04D00964DE0 /* Icons.icns in Resources */, + 6F9FA25F2589627800672FDA /* Assets.xcassets in Resources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 6F9FA2672589627800672FDA /* Resources */ = { + isa = PBXResourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXResourcesBuildPhase section */ + +/* Begin PBXSourcesBuildPhase section */ + 6F9FA2532589627700672FDA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 6F9FA292258962A900672FDA /* VisionAnalysisManager.swift in Sources */, + 6FF2A447259530AB00AD5C44 /* AnotateDatasetViewModel.swift in Sources */, + 6F9FA29C258962C400672FDA /* VNRecognizedPointsObservation+.swift in Sources */, + 6F97A82F2644363D007229F8 /* CardViewModifier.swift in Sources */, + 6B6CD9F925B4660500691F2F /* VideoAnalysisOperation.swift in Sources */, + 6F9FA290258962A900672FDA /* DatasetManager.swift in Sources */, + 6F9FA2AA258962F500672FDA /* MachineLearningConfiguration.swift in Sources */, + 6F9FA2BE258963DF00672FDA /* AnotateVideoViewModel.swift in Sources */, + 6FF2A43D25952BC300AD5C44 /* Array+.swift in Sources */, + F5C4C4AF25A88D94009E31DD /* VideoSettingsView.swift in Sources */, + 6FF2A4352594F7E200AD5C44 /* Constants.swift in Sources */, + 6F9FA2A9258962F500672FDA /* ObservationTerminology.swift in Sources */, + F5C4C4AC25A88D94009E31DD /* AnalysisSettingsView.swift in Sources */, + 6F9FA2BD258963DF00672FDA /* AnotateVideoView.swift in Sources */, + 6F9FA291258962A900672FDA /* VideoProcessingManager.swift in Sources */, + 6F9FA29A258962C300672FDA /* Dictionary+.swift in Sources */, + 6F9FA293258962A900672FDA /* DataStructuringManager.swift in Sources */, + 6F9FA29B258962C300672FDA /* FilePanel+.swift in Sources */, + 6F9FA2BC258963DF00672FDA /* ContentView.swift in Sources */, + 6F9FA2A8258962F500672FDA /* ObservationConfiguration.swift in Sources */, + F5C4C4AD25A88D94009E31DD /* GeneralSettingsView.swift in Sources */, + 6FF2A4432595309E00AD5C44 /* AnotateDatasetView.swift in Sources */, + 6F3FB5CB2595652100972A4C /* URL+.swift in Sources */, + 6F9FA25B2589627700672FDA /* AppDelegate.swift in Sources */, + 6B6CD9F325B4658400691F2F /* AsyncOperation.swift in Sources */, + F5C4C4AE25A88D94009E31DD /* SettingsView.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; + 6F9FA2652589627800672FDA /* Sources */ = { + isa = PBXSourcesBuildPhase; + buildActionMask = 2147483647; + files = ( + 6F9FA26E2589627800672FDA /* DataAnnotationTests.swift in Sources */, + ); + runOnlyForDeploymentPostprocessing = 0; + }; +/* End PBXSourcesBuildPhase section */ + +/* Begin PBXTargetDependency section */ + 6F9FA26B2589627800672FDA /* PBXTargetDependency */ = { + isa = PBXTargetDependency; + target = 6F9FA2562589627700672FDA /* DataAnnotation */; + targetProxy = 6F9FA26A2589627800672FDA /* PBXContainerItemProxy */; + }; +/* End PBXTargetDependency section */ + +/* Begin XCBuildConfiguration section */ + 6F9FA27B2589627900672FDA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = dwarf; + ENABLE_STRICT_OBJC_MSGSEND = YES; + ENABLE_TESTABILITY = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_DYNAMIC_NO_PIC = NO; + GCC_NO_COMMON_BLOCKS = YES; + GCC_OPTIMIZATION_LEVEL = 0; + GCC_PREPROCESSOR_DEFINITIONS = ( + "DEBUG=1", + "$(inherited)", + ); + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 11.1; + MTL_ENABLE_DEBUG_INFO = INCLUDE_SOURCE; + MTL_FAST_MATH = YES; + ONLY_ACTIVE_ARCH = YES; + SDKROOT = macosx; + SWIFT_ACTIVE_COMPILATION_CONDITIONS = DEBUG; + SWIFT_OPTIMIZATION_LEVEL = "-Onone"; + }; + name = Debug; + }; + 6F9FA27C2589627900672FDA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_SEARCH_USER_PATHS = NO; + CLANG_ANALYZER_LOCALIZABILITY_NONLOCALIZED = YES; + CLANG_ANALYZER_NONNULL = YES; + CLANG_ANALYZER_NUMBER_OBJECT_CONVERSION = YES_AGGRESSIVE; + CLANG_CXX_LANGUAGE_STANDARD = "gnu++14"; + CLANG_CXX_LIBRARY = "libc++"; + CLANG_ENABLE_MODULES = YES; + CLANG_ENABLE_OBJC_ARC = YES; + CLANG_ENABLE_OBJC_WEAK = YES; + CLANG_WARN_BLOCK_CAPTURE_AUTORELEASING = YES; + CLANG_WARN_BOOL_CONVERSION = YES; + CLANG_WARN_COMMA = YES; + CLANG_WARN_CONSTANT_CONVERSION = YES; + CLANG_WARN_DEPRECATED_OBJC_IMPLEMENTATIONS = YES; + CLANG_WARN_DIRECT_OBJC_ISA_USAGE = YES_ERROR; + CLANG_WARN_DOCUMENTATION_COMMENTS = YES; + CLANG_WARN_EMPTY_BODY = YES; + CLANG_WARN_ENUM_CONVERSION = YES; + CLANG_WARN_INFINITE_RECURSION = YES; + CLANG_WARN_INT_CONVERSION = YES; + CLANG_WARN_NON_LITERAL_NULL_CONVERSION = YES; + CLANG_WARN_OBJC_IMPLICIT_RETAIN_SELF = YES; + CLANG_WARN_OBJC_LITERAL_CONVERSION = YES; + CLANG_WARN_OBJC_ROOT_CLASS = YES_ERROR; + CLANG_WARN_QUOTED_INCLUDE_IN_FRAMEWORK_HEADER = YES; + CLANG_WARN_RANGE_LOOP_ANALYSIS = YES; + CLANG_WARN_STRICT_PROTOTYPES = YES; + CLANG_WARN_SUSPICIOUS_MOVE = YES; + CLANG_WARN_UNGUARDED_AVAILABILITY = YES_AGGRESSIVE; + CLANG_WARN_UNREACHABLE_CODE = YES; + CLANG_WARN__DUPLICATE_METHOD_MATCH = YES; + COPY_PHASE_STRIP = NO; + DEBUG_INFORMATION_FORMAT = "dwarf-with-dsym"; + ENABLE_NS_ASSERTIONS = NO; + ENABLE_STRICT_OBJC_MSGSEND = YES; + GCC_C_LANGUAGE_STANDARD = gnu11; + GCC_NO_COMMON_BLOCKS = YES; + GCC_WARN_64_TO_32_BIT_CONVERSION = YES; + GCC_WARN_ABOUT_RETURN_TYPE = YES_ERROR; + GCC_WARN_UNDECLARED_SELECTOR = YES; + GCC_WARN_UNINITIALIZED_AUTOS = YES_AGGRESSIVE; + GCC_WARN_UNUSED_FUNCTION = YES; + GCC_WARN_UNUSED_VARIABLE = YES; + MACOSX_DEPLOYMENT_TARGET = 11.1; + MTL_ENABLE_DEBUG_INFO = NO; + MTL_FAST_MATH = YES; + SDKROOT = macosx; + SWIFT_COMPILATION_MODE = wholemodule; + SWIFT_OPTIMIZATION_LEVEL = "-O"; + }; + name = Release; + }; + 6F9FA27E2589627900672FDA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_ENTITLEMENTS = DataAnnotation/Resources/DataAnnotation.entitlements; + CODE_SIGN_IDENTITY = "-"; + CODE_SIGN_STYLE = Automatic; + COMBINE_HIDPI_IMAGES = YES; + CURRENT_PROJECT_VERSION = 0; + DEVELOPMENT_ASSET_PATHS = "\"DataAnnotation/Resources\""; + DEVELOPMENT_TEAM = KW278547Y4; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + INFOPLIST_FILE = DataAnnotation/Resources/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + MACOSX_DEPLOYMENT_TARGET = 11.0; + PRODUCT_BUNDLE_IDENTIFIER = com.signlanguagerecognition.DataAnnotation; + PRODUCT_NAME = "Pose Data Annotator"; + SWIFT_VERSION = 5.0; + }; + name = Debug; + }; + 6F9FA27F2589627900672FDA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ASSETCATALOG_COMPILER_GLOBAL_ACCENT_COLOR_NAME = AccentColor; + CODE_SIGN_ENTITLEMENTS = DataAnnotation/Resources/DataAnnotation.entitlements; + CODE_SIGN_IDENTITY = "-"; + CODE_SIGN_STYLE = Automatic; + COMBINE_HIDPI_IMAGES = YES; + CURRENT_PROJECT_VERSION = 0; + DEVELOPMENT_ASSET_PATHS = "\"DataAnnotation/Resources\""; + DEVELOPMENT_TEAM = KW278547Y4; + ENABLE_HARDENED_RUNTIME = YES; + ENABLE_PREVIEWS = YES; + INFOPLIST_FILE = DataAnnotation/Resources/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + ); + MACOSX_DEPLOYMENT_TARGET = 11.0; + PRODUCT_BUNDLE_IDENTIFIER = com.signlanguagerecognition.DataAnnotation; + PRODUCT_NAME = "Pose Data Annotator"; + SWIFT_VERSION = 5.0; + }; + name = Release; + }; + 6F9FA2812589627900672FDA /* Debug */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + COMBINE_HIDPI_IMAGES = YES; + DEVELOPMENT_TEAM = KW278547Y4; + INFOPLIST_FILE = DataAnnotationTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + "@loader_path/../Frameworks", + ); + MACOSX_DEPLOYMENT_TARGET = 11.0; + PRODUCT_BUNDLE_IDENTIFIER = com.thanhdolong.DataAnnotationTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/DataAnnotation.app/Contents/MacOS/DataAnnotation"; + }; + name = Debug; + }; + 6F9FA2822589627900672FDA /* Release */ = { + isa = XCBuildConfiguration; + buildSettings = { + ALWAYS_EMBED_SWIFT_STANDARD_LIBRARIES = YES; + BUNDLE_LOADER = "$(TEST_HOST)"; + CODE_SIGN_STYLE = Automatic; + COMBINE_HIDPI_IMAGES = YES; + DEVELOPMENT_TEAM = KW278547Y4; + INFOPLIST_FILE = DataAnnotationTests/Info.plist; + LD_RUNPATH_SEARCH_PATHS = ( + "$(inherited)", + "@executable_path/../Frameworks", + "@loader_path/../Frameworks", + ); + MACOSX_DEPLOYMENT_TARGET = 11.0; + PRODUCT_BUNDLE_IDENTIFIER = com.thanhdolong.DataAnnotationTests; + PRODUCT_NAME = "$(TARGET_NAME)"; + SWIFT_VERSION = 5.0; + TEST_HOST = "$(BUILT_PRODUCTS_DIR)/DataAnnotation.app/Contents/MacOS/DataAnnotation"; + }; + name = Release; + }; +/* End XCBuildConfiguration section */ + +/* Begin XCConfigurationList section */ + 6F9FA2522589627700672FDA /* Build configuration list for PBXProject "DataAnnotation" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 6F9FA27B2589627900672FDA /* Debug */, + 6F9FA27C2589627900672FDA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 6F9FA27D2589627900672FDA /* Build configuration list for PBXNativeTarget "DataAnnotation" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 6F9FA27E2589627900672FDA /* Debug */, + 6F9FA27F2589627900672FDA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; + 6F9FA2802589627900672FDA /* Build configuration list for PBXNativeTarget "DataAnnotationTests" */ = { + isa = XCConfigurationList; + buildConfigurations = ( + 6F9FA2812589627900672FDA /* Debug */, + 6F9FA2822589627900672FDA /* Release */, + ); + defaultConfigurationIsVisible = 0; + defaultConfigurationName = Release; + }; +/* End XCConfigurationList section */ + }; + rootObject = 6F9FA24F2589627700672FDA /* Project object */; +} diff --git a/macOS/DataAnnotation.xcodeproj/project.xcworkspace/contents.xcworkspacedata b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/contents.xcworkspacedata new file mode 100644 index 0000000..919434a --- /dev/null +++ b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/contents.xcworkspacedata @@ -0,0 +1,7 @@ + + + + + diff --git a/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist new file mode 100644 index 0000000..18d9810 --- /dev/null +++ b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcshareddata/IDEWorkspaceChecks.plist @@ -0,0 +1,8 @@ + + + + + IDEDidComputeMac32BitWarning + + + diff --git a/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcuserdata/matyasbohacek.xcuserdatad/UserInterfaceState.xcuserstate b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcuserdata/matyasbohacek.xcuserdatad/UserInterfaceState.xcuserstate new file mode 100644 index 0000000..3b2422f Binary files /dev/null and b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcuserdata/matyasbohacek.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcuserdata/thanhdolong.xcuserdatad/UserInterfaceState.xcuserstate b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcuserdata/thanhdolong.xcuserdatad/UserInterfaceState.xcuserstate new file mode 100644 index 0000000..510d1b1 Binary files /dev/null and b/macOS/DataAnnotation.xcodeproj/project.xcworkspace/xcuserdata/thanhdolong.xcuserdatad/UserInterfaceState.xcuserstate differ diff --git a/macOS/DataAnnotation.xcodeproj/xcuserdata/matyasbohacek.xcuserdatad/xcschemes/xcschememanagement.plist b/macOS/DataAnnotation.xcodeproj/xcuserdata/matyasbohacek.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 0000000..40e7734 --- /dev/null +++ b/macOS/DataAnnotation.xcodeproj/xcuserdata/matyasbohacek.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,14 @@ + + + + + SchemeUserState + + DataAnnotation.xcscheme_^#shared#^_ + + orderHint + 0 + + + + diff --git a/macOS/DataAnnotation.xcodeproj/xcuserdata/thanhdolong.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/macOS/DataAnnotation.xcodeproj/xcuserdata/thanhdolong.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist new file mode 100644 index 0000000..e8abd6a --- /dev/null +++ b/macOS/DataAnnotation.xcodeproj/xcuserdata/thanhdolong.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -0,0 +1,136 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/macOS/DataAnnotation.xcodeproj/xcuserdata/thanhdolong.xcuserdatad/xcschemes/xcschememanagement.plist b/macOS/DataAnnotation.xcodeproj/xcuserdata/thanhdolong.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 0000000..40e7734 --- /dev/null +++ b/macOS/DataAnnotation.xcodeproj/xcuserdata/thanhdolong.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,14 @@ + + + + + SchemeUserState + + DataAnnotation.xcscheme_^#shared#^_ + + orderHint + 0 + + + + diff --git a/macOS/DataAnnotation/Application/AppDelegate.swift b/macOS/DataAnnotation/Application/AppDelegate.swift new file mode 100644 index 0000000..c643203 --- /dev/null +++ b/macOS/DataAnnotation/Application/AppDelegate.swift @@ -0,0 +1,28 @@ +// +// DataAnnotationApp.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 15.12.2020. +// + +import SwiftUI + +@main +struct DataAnnotationApp: App { + var body: some Scene { + WindowGroup { + ContentView() + .onAppear(perform: { + ObservationType.allCases.forEach { observationType in + if UserDefaults.standard.object(forKey: observationType.rawValue) == nil { + UserDefaults.standard.setValue(true, forKey: observationType.rawValue) + } + } + }) + } + + Settings { + SettingsView() + } + } +} diff --git a/macOS/DataAnnotation/Application/Constants.swift b/macOS/DataAnnotation/Application/Constants.swift new file mode 100644 index 0000000..a1b5a8d --- /dev/null +++ b/macOS/DataAnnotation/Application/Constants.swift @@ -0,0 +1,11 @@ +// +// Constants.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 24.12.2020. +// + + +struct Constant { + static let allowedFileTypes = ["mp4", "mov"] +} diff --git a/macOS/DataAnnotation/Configuration/MachineLearningConfiguration.swift b/macOS/DataAnnotation/Configuration/MachineLearningConfiguration.swift new file mode 100644 index 0000000..3bec881 --- /dev/null +++ b/macOS/DataAnnotation/Configuration/MachineLearningConfiguration.swift @@ -0,0 +1,27 @@ +// +// MachineLearningConfiguration.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 13.12.2020. +// + +import Foundation + +struct MachineLearningConfiguration { + + /// + /// Threshold for the hand pose detection using the Vision framework. + /// + static var handPoseDetectionThreshold: Float = UserDefaults.standard.float(forKey: "handPoseDetectionThreshold") + + /// + /// Threshold for the body pose detection using the Vision framework. + /// + static let bodyPoseDetectionThreshold: Float = UserDefaults.standard.float(forKey: "bodyPoseDetectionThreshold") + + /// + /// Frames per seconds to be used to process frames from videos. + /// + static let framesPerSecond: Int = UserDefaults.standard.integer(forKey: "fps") + +} diff --git a/macOS/DataAnnotation/Configuration/ObservationConfiguration.swift b/macOS/DataAnnotation/Configuration/ObservationConfiguration.swift new file mode 100644 index 0000000..036996b --- /dev/null +++ b/macOS/DataAnnotation/Configuration/ObservationConfiguration.swift @@ -0,0 +1,68 @@ +// +// ObservationConfiguration.swift +// SLR data annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Vision + +enum ObservationType: String, CaseIterable { + case bodyLandmarks + case handLandmarks + case faceLandmarks +} + +struct ObservationConfiguration { + /// + /// List of all the data annotations to be analyzed using Vision. + /// + static var desiredDataAnnotations: [ObservationType] { + get { + var result = [ObservationType]() + + ObservationType.allCases.forEach { observationType in + if UserDefaults.standard.bool(forKey: observationType.rawValue) { + result.append(observationType) + } + } + + return result + } + } + + /// + /// List of requested recognized body landmarks key in order to filter out any redundant. + /// + /// - Warning: If empty, all body landmarks are requested + /// + static let requestedBodyLandmarks: [VNHumanBodyPoseObservation.JointName] = [ + .nose, .root, .neck, + .rightEye, .leftEye, + .rightEar, .leftEar, + .rightShoulder, .leftShoulder, + .rightElbow, .leftElbow, + .rightWrist, .leftWrist + ] + + /// + /// List of requested recognized hand landmarks key in order to filter out any redundant. + /// + /// - Warning: If empty, all hand landmarks are requested + /// + static let requestedHandLandmarks: [VNHumanHandPoseObservation.JointName] = [ + .wrist, .thumbCMC, + .thumbMP, .thumbIP, + .thumbTip, .indexMCP, + .indexPIP, .indexDIP, + .indexTip, .middleMCP, + .middlePIP, .middleDIP, + .middleTip, .ringMCP, + .ringPIP, .ringDIP, + .ringTip, .littleMCP, + .littlePIP, .littleDIP, + .littleTip + ] +} diff --git a/macOS/DataAnnotation/Configuration/ObservationTerminology.swift b/macOS/DataAnnotation/Configuration/ObservationTerminology.swift new file mode 100644 index 0000000..aa0e04a --- /dev/null +++ b/macOS/DataAnnotation/Configuration/ObservationTerminology.swift @@ -0,0 +1,58 @@ +// +// ObservationTerminology.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 13.12.2020. +// + +import Vision + +struct ObservationTerminology { + /// + /// Dictionary for conversion bewteen the `VNHumanBodyPoseObservation.JointName` and custom methodology + /// `String` identifiers for the body landmarks. + /// + static let bodyLandmarksKeysToLabels: [VNHumanBodyPoseObservation.JointName: String] = [ + .nose: "nose", + .root: "root", + .neck: "neck", + .rightEye: "rightEye", + .leftEye: "leftEye", + .rightEar: "rightEar", + .leftEar: "leftEar", + .rightShoulder: "rightShoulder", + .leftShoulder: "leftShoulder", + .rightElbow: "rightElbow", + .leftElbow: "leftElbow", + .rightWrist: "rightWrist", + .leftWrist: "leftWrist" + ] + + /// + /// Dictionary for conversion bewteen the `VNHumanHandPoseObservation.JointName` and custom methodology + /// `String` identifiers for the hand landmarks. + /// + static let handLandmarksKeysToLabels: [VNHumanHandPoseObservation.JointName: String] = [ + .wrist: "wrist", + .indexTip: "indexTip", + .indexDIP: "indexDIP", + .indexPIP: "indexPIP", + .indexMCP: "indexMCP", + .middleTip: "middleTip", + .middleDIP: "middleDIP", + .middlePIP: "middlePIP", + .middleMCP: "middleMCP", + .ringTip: "ringTip", + .ringDIP: "ringDIP", + .ringPIP: "ringPIP", + .ringMCP: "ringMCP", + .littleTip: "littleTip", + .littleDIP: "littleDIP", + .littlePIP: "littlePIP", + .littleMCP: "littleMCP", + .thumbTip: "thumbTip", + .thumbIP: "thumbIP", + .thumbMP: "thumbMP", + .thumbCMC: "thumbCMC" + ] +} diff --git a/macOS/DataAnnotation/Extensions/Array+.swift b/macOS/DataAnnotation/Extensions/Array+.swift new file mode 100644 index 0000000..3eff58b --- /dev/null +++ b/macOS/DataAnnotation/Extensions/Array+.swift @@ -0,0 +1,14 @@ +// +// Array+.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 24.12.2020. +// + +import Foundation + +extension Array { + var elementBeforeLast: Element? { + return dropLast().last + } +} diff --git a/macOS/DataAnnotation/Extensions/Dictionary+.swift b/macOS/DataAnnotation/Extensions/Dictionary+.swift new file mode 100644 index 0000000..e585efe --- /dev/null +++ b/macOS/DataAnnotation/Extensions/Dictionary+.swift @@ -0,0 +1,14 @@ +// +// Array+.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 13.12.2020. +// + +import Foundation + +extension Dictionary { + mutating func add(_ element: Element, toArrayOn key: Key) where Value == [Element] { + self[key] == nil ? self[key] = [element] : self[key]?.append(element) + } +} diff --git a/macOS/DataAnnotation/Extensions/FilePanel+.swift b/macOS/DataAnnotation/Extensions/FilePanel+.swift new file mode 100644 index 0000000..3f4db8b --- /dev/null +++ b/macOS/DataAnnotation/Extensions/FilePanel+.swift @@ -0,0 +1,48 @@ +// +// FilePanel+.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 12.12.2020. +// + +import Cocoa +import Vision + +extension NSOpenPanel { + static func openVideo(completion: @escaping (_ result: Result) -> Void) { + let panel = NSOpenPanel() + panel.allowsMultipleSelection = false + panel.canChooseFiles = true + panel.canChooseDirectories = false + panel.allowedFileTypes = Constant.allowedFileTypes + panel.begin { (result) in + guard result == .OK, let url = panel.urls.first else { + return completion(.failure( + NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Failed to get file location"]) + )) + } + + completion(.success(url)) + } + } + + static func openFolder(completion: @escaping (_ result: Result) -> Void) { + let panel = NSOpenPanel() + panel.allowsMultipleSelection = false + panel.canChooseFiles = false + panel.canChooseDirectories = true + panel.allowedFileTypes = Constant.allowedFileTypes + panel.begin { (result) in + guard result == .OK, let url = panel.urls.first else { + return completion(.failure( + NSError(domain: "", code: 0, userInfo: [NSLocalizedDescriptionKey: "Failed to get file location"]) + )) + } + + completion(.success(url)) + } + } +} + +extension NSSavePanel { +} diff --git a/macOS/DataAnnotation/Extensions/URL+.swift b/macOS/DataAnnotation/Extensions/URL+.swift new file mode 100644 index 0000000..78256b1 --- /dev/null +++ b/macOS/DataAnnotation/Extensions/URL+.swift @@ -0,0 +1,15 @@ +// +// URL+.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 25.12.2020. +// + +import Foundation + +extension URL { + var isDirectory: Bool { + let values = try? resourceValues(forKeys: [.isDirectoryKey]) + return values?.isDirectory ?? false + } +} diff --git a/macOS/DataAnnotation/Extensions/VNRecognizedPointsObservation+.swift b/macOS/DataAnnotation/Extensions/VNRecognizedPointsObservation+.swift new file mode 100644 index 0000000..62c46e0 --- /dev/null +++ b/macOS/DataAnnotation/Extensions/VNRecognizedPointsObservation+.swift @@ -0,0 +1,22 @@ +// +// VNRecognizedPointsObservation+.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 13.12.2020. +// + +import Vision + +extension VNHumanBodyPoseObservation.JointName { + func stringValue() -> String { + guard let label = ObservationTerminology.bodyLandmarksKeysToLabels[self] else { fatalError("Cannot converse landamard") } + return label + } +} + +extension VNHumanHandPoseObservation.JointName { + func stringValue() -> String { + guard let label = ObservationTerminology.handLandmarksKeysToLabels[self] else { fatalError("Cannot converse landamard") } + return label + } +} diff --git a/macOS/DataAnnotation/Managers/DataStructuringManager.swift b/macOS/DataAnnotation/Managers/DataStructuringManager.swift new file mode 100644 index 0000000..d6b17b7 --- /dev/null +++ b/macOS/DataAnnotation/Managers/DataStructuringManager.swift @@ -0,0 +1,203 @@ +// +// DataStructuringManager.swift +// SLR data annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import CreateML +import Vision + +class DataStructuringManager { + + /// + /// Representations of the possible errors occuring in this context + /// + enum OutputProcessingError: Error { + case invalidData + case structuringData + } + + lazy var queue = OperationQueue() + + /// + /// Converts the data from the hand landmarks observations to landmark keys, for further data + /// structuring. + /// + /// - Parameters: + /// - recognizedLandmarks: Array of arrays of dictionaries with data from Vision analysis + /// + /// - Returns: Dictionary of Strings to arrays of Doubles for further processing + /// + func convertHandLandmarksToMLData(recognizedLandmarks: KeyHandLandmarks) -> [String: [Double]] { + // Prepare the dictionary for all of the possible landmarks keys to be added + var converted = [String: [Double]]() + + for (observationIndex, observation) in recognizedLandmarks.enumerated() { + // Ensure that maximally two hand are analyzed + var maxObservationIndex = 2 + + if maxObservationIndex > observation.count { + maxObservationIndex = observation.count + } + + // Structure the data with the new keys + for (handIndex, data) in observation[0.. [String: [Double]] { + // Prepare the dictionary for all of the possible landmarks keys to be added + var converted = [String: [Double]]() + + for (observationIndex, observation) in recognizedLandmarks.enumerated() { + if !observation.isEmpty { + // Structure the data with the new keys + for (landmarkKey, value) in observation[0] { + converted.add(Double(value.location.x), toArrayOn: "\(landmarkKey.stringValue())_X") + converted.add(Double(value.location.y), toArrayOn: "\(landmarkKey.stringValue())_Y") + } + + // Fill in the values for all potential landmarks that were not captured + for landmarkKey in ObservationConfiguration.requestedBodyLandmarks where converted["\(landmarkKey.stringValue())_X"]?.count != observationIndex + 1 { + converted.add(0, toArrayOn: "\(landmarkKey.stringValue())_X") + converted.add(0, toArrayOn: "\(landmarkKey.stringValue())_Y") + } + } else { + for landmarkKey in ObservationConfiguration.requestedBodyLandmarks { + converted.add(0, toArrayOn: "\(landmarkKey.stringValue())_X") + converted.add(0, toArrayOn: "\(landmarkKey.stringValue())_Y") + } + } + } + + return converted + } + + /// + /// Converts the data from the face landmarks observations to landmark keys, for further data + /// structuring. + /// + /// - Parameters: + /// - recognizedLandmarks: Array of arrays of dictionaries with data from Vision analysis + /// + /// - Returns: Dictionary of Strings to arrays of Doubles for further processing + /// + func convertFaceLandmarksToMLData(recognizedLandmarks: KeyFaceLandmarks) -> [String: [Double]] { + // Prepare the dictionary for all of the possible landmarks keys to be added + var converted = [String: [Double]]() + + for (observationIndex, observation) in recognizedLandmarks.enumerated() { + if let observation = observation.first { + // Structure the data with the new keys + for (landmarkIndex, landmark) in observation.enumerated() { + converted.add(Double(landmark.x), toArrayOn: "\(landmarkIndex)_X") + converted.add(Double(landmark.y), toArrayOn: "\(landmarkIndex)_Y") + } + } + + // Fill in the values for all potential landmarks that were not captured + converted.forEach { key, _ in + if converted[key]?.count != observationIndex + 1 { + converted[key]?.append(0.0) + } + } + } + + return converted + } + + /// + /// Combine the data from multiple VisionAnalysisManagers into a MLDataTable. + /// + /// - Parameters: + /// - labels: Array of String labels of the individuals signs + /// - visionAnalyses: Array of the processed and annotated VisionAnalysisManagers + /// + /// - Returns: Newly constructed MLDataTable + /// + func combineData(labels: [String], results: [VisionAnalysisResult]) throws -> MLDataTable { + // Ensure that the data is equally long + guard labels.count == results.count else { + throw OutputProcessingError.invalidData + } + + // Prepare the structured data in the MLDataTable-processable format + var convertedToMLData = [String: MLDataValueConvertible]() + convertedToMLData["labels"] = labels + + // Stack the data from individual analyses to arrays + var stackedData = [String: [[Double]]]() + var videoMetadata = ["width": [Double](), "height": [Double](), "fps": [Double]()] + + for analysis in results { + // Append data for body landmarks + if ObservationConfiguration.desiredDataAnnotations.contains(.bodyLandmarks) { + for (key, value) in + convertBodyLandmarksToMLData(recognizedLandmarks: analysis.keyLandmarks.body) { + stackedData.add(value, toArrayOn: key) + } + } + + // Append data for hand landmarks + if ObservationConfiguration.desiredDataAnnotations.contains(.handLandmarks) { + for (key, value) in convertHandLandmarksToMLData(recognizedLandmarks: analysis.keyLandmarks.hand) { + stackedData.add(value, toArrayOn: key) + } + } + + // Append data for face landmarks + if ObservationConfiguration.desiredDataAnnotations.contains(.faceLandmarks) { + for (key, value) in convertFaceLandmarksToMLData(recognizedLandmarks: analysis.keyLandmarks.face) { + stackedData.add(value, toArrayOn: key) + } + } + + // Add video size information to the dataset + videoMetadata["width"]?.append(Double(analysis.videoSize.width)) + videoMetadata["height"]?.append(Double(analysis.videoSize.height)) + videoMetadata["fps"]?.append(Double(analysis.fps)) + } + + for (key, value) in stackedData { + convertedToMLData[key] = value + } + + convertedToMLData["video_size_width"] = videoMetadata["width"] + convertedToMLData["video_size_height"] = videoMetadata["height"] + convertedToMLData["video_fps"] = videoMetadata["fps"] + + do { + // Create a MLDataTable on top of the structured data + return try MLDataTable(dictionary: convertedToMLData) + } catch { + throw OutputProcessingError.structuringData + } + } +} diff --git a/macOS/DataAnnotation/Managers/DatasetManager.swift b/macOS/DataAnnotation/Managers/DatasetManager.swift new file mode 100644 index 0000000..84818db --- /dev/null +++ b/macOS/DataAnnotation/Managers/DatasetManager.swift @@ -0,0 +1,122 @@ +// +// DatasetManager.swift +// SLR data annotation +// +// Created by Matyáš Boháček on 07/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Cocoa +import CreateML + +class DatasetManager { + + /// + /// Representations of the possible errors occuring in this context + /// + enum DatasetError: Error { + case invalidDirectoryContents + case unsupportedFormat + } + + // MARK: Properties + + private let directoryPath: String + private let fps: Int + private let fileManager: FileManager + + lazy var queue: OperationQueue = { + var queue = OperationQueue() + queue.name = "DatasetManager" + queue.maxConcurrentOperationCount = 3 + return queue + }() + + // MARK: Methods + + /// + /// Initiates the DatasetManager for easy data processing and annotations of complete dataset directories. + /// + /// - Parameters: + /// - directoryPath: String path of the dataset directory + /// - fps: Frames per second to be annotated by the individual videos + /// + init(directoryPath: String, + fps: Int, + fileManager: FileManager = .default) { + self.directoryPath = directoryPath + self.fps = fps + self.fileManager = fileManager + } + + /// + /// Annotates the entire associated dataset and returns the data in a form of a MLDataTable. + /// + /// - Returns: MLDataTable generated from the data + /// + /// - Throws: Corresponding `DatasetError` or `OutputProcessingError`, based on any + /// errors occurring during the data processing or the annotations + /// + func generateMLTable(_ completion: @escaping(Result) -> ()) { + var foundSubdirectories = [String]() + var labels = [String]() + var analysisResult = [VisionAnalysisResult]() + var operations: [Operation] = [] + do { + // Load all of the labels present in the dataset + foundSubdirectories = try fileManager.contentsOfDirectory(atPath: self.directoryPath) + } catch { + completion(.failure(DatasetError.invalidDirectoryContents)) + } + + let videoAnalysisFinishedOp = BlockOperation { + do { + // Structure the data into a MLDataTable + let outputDataStructuringManager = DataStructuringManager() + let output = try outputDataStructuringManager.combineData(labels: labels, results: analysisResult) + completion(.success(output)) + } catch { + completion(.failure(error)) + } + } + // Create annotations managers for each of the labels + do { + for subdirectory in foundSubdirectories where subdirectory.contains(".") == false { + // Construct the URL path for each of the labels (items of the repository) + let currentLabelPath = self.directoryPath.appending("/" + subdirectory + "/") + + try fileManager.contentsOfDirectory(atPath: currentLabelPath) + .filter({ $0.starts(with: ".") == false }) + .forEach { item in + // Prevent from non-video formats + guard item.contains(".mp4") else{ + throw DatasetError.unsupportedFormat + } + // Load and process the annotations for each of the videos + let currentItemAnalysisManager = VisionAnalysisManager( + videoUrl: URL(fileURLWithPath: currentLabelPath.appending(item)), + fps: fps) + + let videoAnalysisOp = VideoAnalysisOperation(visionAnalysisManager: currentItemAnalysisManager) { result in + analysisResult.append(result) + } + + operations.append(videoAnalysisOp) + + labels.append(subdirectory) + } + } + } catch { + completion(.failure(error)) + } + + if let lastOp = operations.last { + videoAnalysisFinishedOp.addDependency(lastOp) + } + + operations.insert(videoAnalysisFinishedOp, at: 0) + print("READY") + queue.addOperations(operations, waitUntilFinished: false) + } +} diff --git a/macOS/DataAnnotation/Managers/VideoProcessingManager.swift b/macOS/DataAnnotation/Managers/VideoProcessingManager.swift new file mode 100644 index 0000000..7883f11 --- /dev/null +++ b/macOS/DataAnnotation/Managers/VideoProcessingManager.swift @@ -0,0 +1,95 @@ +// +// VideoProcessingManager.swift +// SLR data annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import AVFoundation + +final class VideoProcessingManager { + /// + /// Processes all of the frames from the given video as a list of CGFrames. + /// + /// - Parameters: + /// - videoUrl: URL of the video to be annotated + /// - fps: Frames per second to be annotated + /// + /// - Returns: Array of frames as CGImages + /// + static func getAllFrames(videoUrl: URL, fps: Int) -> [CGImage] { + // Import the video into AVFoundation + let asset = AVURLAsset(url: videoUrl) + let duration = CMTimeGetSeconds(asset.duration) + + let generator = AVAssetImageGenerator(asset: asset) + generator.appliesPreferredTrackTransform = true + generator.apertureMode = AVAssetImageGenerator.ApertureMode.encodedPixels + generator.requestedTimeToleranceBefore = CMTimeMake(value: 1, timescale: 100) + + var frames = [CGImage]() + + // Process the frames for given frames per second rate at every second + for secondsIndex in 0 ..< Int(ceil(duration)) { + for frameIndex in 0 ..< fps { + let timeForFrame = Double(secondsIndex) + Double(frameIndex) * (1.0 / Double(fps)) + if timeForFrame < duration, let frame = self.getFrame(fromTime: Float64(timeForFrame), generator: generator) { + frames.append(frame) + } + } + } + + // Prevent additional crashes with the AVFoundation processing + generator.cancelAllCGImageGeneration() + return frames + + } + + /// + /// Converts the frame from the given AVAssetImageGenerator at the given time within + /// the encoded video. + /// + /// - Parameters: + /// - fromTime: Float64 of the time to extract the frame from + /// - generator: AVAssetImageGenerator with the video already encoded + /// + /// - Returns: Desired frame as CGImage + /// + static func getFrame(fromTime: Float64, generator: AVAssetImageGenerator) -> CGImage? { + let image: CGImage + + // Convert the time to the supported CMTime + let time = CMTimeMakeWithSeconds(fromTime, preferredTimescale: 60) + + do { + // Convert the image at the given time + try image = generator.copyCGImage(at: time, actualTime: nil) + } catch { + return nil + } + + return image + } + + /// + /// Calculates the given video's size. + /// + /// - Parameters: + /// - videoUrl: URL of the video to be annotated + /// + /// - Returns: CGSize of the given video + /// + static func getVideoSize(videoUrl: URL) -> CGSize { + // Import the video into AVFoundation + let asset = AVAsset(url: videoUrl) + guard let track = asset.tracks(withMediaType: AVMediaType.video).first else { return CGSize() } + + // Calculate the size using the transformation from the track + let size = track.naturalSize.applying(track.preferredTransform) + + // Convert the data into CGSize + return CGSize(width: abs(size.width), height: abs(size.height)) + } +} diff --git a/macOS/DataAnnotation/Managers/VisionAnalysisManager.swift b/macOS/DataAnnotation/Managers/VisionAnalysisManager.swift new file mode 100644 index 0000000..dba4a82 --- /dev/null +++ b/macOS/DataAnnotation/Managers/VisionAnalysisManager.swift @@ -0,0 +1,106 @@ +// +// VisionAnalysisManager.swift +// SLR data annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Cocoa +import Vision + +typealias KeyBodyLandmarks = [[[VNHumanBodyPoseObservation.JointName: VNPoint]]] +typealias KeyHandLandmarks = [[[VNHumanHandPoseObservation.JointName: VNPoint]]] +typealias KeyFaceLandmarks = [[[CGPoint]]] + +final class KeyLandmarks { + var body = KeyBodyLandmarks() + var hand = KeyHandLandmarks() + var face = KeyFaceLandmarks() +} + +struct VisionAnalysisResult { + let keyLandmarks : KeyLandmarks + let videoSize: CGSize + let fps: Int +} + +final class VisionAnalysisManager { + + // MARK: Properties + + private let videoUrl: URL + private var frames = [CGImage]() + + private(set) var fps: Int + private(set) var videoSize = CGSize() + + var keyLandmarks = KeyLandmarks() + var operations: [Operation] = [] + + lazy var queue: OperationQueue = { + var queue = OperationQueue() + queue.name = "VisionAnalysisManager" + queue.maxConcurrentOperationCount = .max + return queue + }() + + // MARK: Methods + + /// + /// Initiates the VisionAnalysisManager which is responsible for the Vision analysis and annotation of any + /// given video. + /// + /// - Parameters: + /// - videoUrl: URL of the video to be annotated + /// - fps: Frames per second to be annotated + /// + init(videoUrl: URL, + fps: Int = UserDefaults.standard.integer(forKey: "fps")) { + self.videoUrl = videoUrl + self.fps = fps + } + + /// + /// Starts the asynchronous process of annotation with the data associated to this VisionAnalysisManager. + /// + public func annotate(_ completion: @escaping () -> ()) { + // Generate the individual frames from the vido + frames = VideoProcessingManager.getAllFrames(videoUrl: self.videoUrl, fps: self.fps) + + // Calculate the size of the video + videoSize = VideoProcessingManager.getVideoSize(videoUrl: self.videoUrl) + + let finishedAnnotationOp = BlockOperation { + self.operations.removeAll() + completion() + } + + frames.forEach { frame in + let videoAnnotateOp = VideoAnnotateOperation( + frame: frame, keyLandmarks: self.keyLandmarks) + operations.append(videoAnnotateOp) + } + + if let lastOp = operations.last { + finishedAnnotationOp.addDependency(lastOp) + } + + operations.insert(finishedAnnotationOp, at: 0) + queue.addOperations(operations, waitUntilFinished: false) + } + + /// + /// Returns all of the data analyzed and structured within this from this VisionAnalysisManager. + /// + /// - Returns: Tuple of arrays of arrays of individual dictionaries. The data is structured in the order: + /// body, hands, face. + /// + /// - Warning: If the data annotations still are not finished, empty arrays will be returned. Check + /// `VisionAnalysisManager.isAnnotated()` to find out the current status. + /// + public func getData() -> (KeyBodyLandmarks, KeyHandLandmarks, KeyFaceLandmarks) { + (self.keyLandmarks.body, self.keyLandmarks.hand, self.keyLandmarks.face) + } +} diff --git a/macOS/DataAnnotation/Operations/AsyncOperation.swift b/macOS/DataAnnotation/Operations/AsyncOperation.swift new file mode 100644 index 0000000..b45739e --- /dev/null +++ b/macOS/DataAnnotation/Operations/AsyncOperation.swift @@ -0,0 +1,70 @@ +// +// AsyncOperation.swift +// DataAnnotation +// +// Created by Rastislav Červenák on 17.01.2021. +// + +import Foundation + +class AsyncOperation: Operation { + enum State: String { + case Ready, Executing, Finished, Error + + fileprivate var keyPath: String { + return "is" + rawValue + } + } + + var state = State.Ready { + willSet { + willChangeValue(forKey: newValue.keyPath) + willChangeValue(forKey: state.keyPath) + } + didSet { + didChangeValue(forKey: oldValue.keyPath) + didChangeValue(forKey: state.keyPath) + } + } + + var finishedWithError: Bool = false + + var errorMessage: String? { + didSet { + self.finishedWithError = true + } + } +} + +extension AsyncOperation { + //: NSOperation Overrides + override var isReady: Bool { + return super.isReady && state == .Ready + } + + override var isExecuting: Bool { + return state == .Executing + } + + override var isFinished: Bool { + return state == .Finished + } + + override var isAsynchronous: Bool { + return true + } + + override func start() { + if isCancelled { + state = .Finished + return + } + + main() + state = .Executing + } + + override func cancel() { + state = .Finished + } +} diff --git a/macOS/DataAnnotation/Operations/VideoAnalysisOperation.swift b/macOS/DataAnnotation/Operations/VideoAnalysisOperation.swift new file mode 100644 index 0000000..2890b1f --- /dev/null +++ b/macOS/DataAnnotation/Operations/VideoAnalysisOperation.swift @@ -0,0 +1,252 @@ +// +// VideoAnalysisOperation.swift +// DataAnnotation +// +// Created by Rastislav Červenák on 17.01.2021. +// + +import Foundation +import Vision + +class VideoAnalysisOperation: AsyncOperation { + let visionAnalysisManager: VisionAnalysisManager + let completion: ((VisionAnalysisResult) -> ())? + + init(visionAnalysisManager: VisionAnalysisManager, + completion: ((VisionAnalysisResult) -> ())? = nil) { + self.visionAnalysisManager = visionAnalysisManager + self.completion = completion + } + + override func main() { + self.visionAnalysisManager.annotate { + let result = VisionAnalysisResult( + keyLandmarks: self.visionAnalysisManager.keyLandmarks, + videoSize: self.visionAnalysisManager.videoSize, + fps: self.visionAnalysisManager.fps) + self.completion?(result) + self.state = .Finished + } + } +} + +class VideoAnnotateOperation: AsyncOperation { + var keyLandmarks: KeyLandmarks + let handler: VNImageRequestHandler + let queue: OperationQueue = .init() + let completion: (() -> ())? + + init(frame: CGImage, + keyLandmarks: KeyLandmarks, + options: [VNImageOption : Any] = [:], + completion: (() -> ())? = nil) { + self.completion = completion + self.keyLandmarks = keyLandmarks + self.handler = VNImageRequestHandler(cgImage: frame, options: options) + } + + override func main() { + DispatchQueue.global(qos: .default).async { + self.invokeBodyPoseDetection(handler: self.handler) + self.invokeHandPoseDetection(handler: self.handler) + self.invokeFaceLandmarksDetection(handler: self.handler) + self.completion?() + self.state = .Finished + } + } + + // MARK: Body landmarks detection + + /// + /// Runs a ML model for detecting body pose within the scene using the Vision framework. The analysis + /// is performed in the background thread. + /// + /// - Parameters: + /// - handler: VNImageRequestHandler to be used to analyse the body pose + /// + func invokeBodyPoseDetection(handler: VNImageRequestHandler) { +// DispatchQueue.global(qos: .userInitiated).async { + do { + // Setup the request + let bodyDetectionRequest = VNDetectHumanBodyPoseRequest(completionHandler: self.retrieveBodyPoseDetectionResults) + + // Perform the request + try handler.perform([bodyDetectionRequest]) + } catch { + print("! \(error)") + } +// } + } + + /// Retrieves results from ML analysis of body pose detection, including the relevant joints and their + /// probabilities. + /// + /// - Parameters: + /// - request: Initial request updated with the results + /// - error: Possible error occuring during the analysis + /// + func retrieveBodyPoseDetectionResults(request: VNRequest, error: Error?) { + guard let observations = + request.results?.first as? VNHumanBodyPoseObservation else { + // Prevent from crashing once face is visible only for certain parts of the record + // TODO: Consider other filling options than just zeros + return self.keyLandmarks.body.append([[VNHumanBodyPoseObservation.JointName: VNPoint]]()) + } + + // Process each observation to find the recognized body landmarks + var result = [[VNHumanBodyPoseObservation.JointName: VNPoint]]() + result.append(processBodyPoseObservation(observations)) + + self.keyLandmarks.body.append(result) + } + + func processBodyPoseObservation(_ observation: VNHumanBodyPoseObservation) -> [VNHumanBodyPoseObservation.JointName: VNPoint] { + // Retrieve all points + guard let recognizedPoints = try? observation.recognizedPoints(.all) else { + return [:] + } + + var keyBodyLandmarks = [VNHumanBodyPoseObservation.JointName: VNPoint]() + let requestedBodyLandmarks = ObservationConfiguration.requestedBodyLandmarks + + // Process all of the recognized landmarks + for (key, point) in recognizedPoints where point.confidence > MachineLearningConfiguration.bodyPoseDetectionThreshold { + // Keep the point for further analysis if relevant + if (requestedBodyLandmarks.contains(key)) || requestedBodyLandmarks.isEmpty { + keyBodyLandmarks[key] = point + } + } + + // Ensure that all landmark keys are present, otherwise fill in a zero + // TODO: Consider other filling options than just zeros + for key in requestedBodyLandmarks where keyBodyLandmarks[key] == nil { + keyBodyLandmarks[key] = VNPoint() + } + + return keyBodyLandmarks + } + + // MARK: Hand landmarks detection + + /// + /// Runs a ML model for detecting hand pose within the scene using the Vision framework. The analysis + /// is performed in the background thread. + /// + /// - Parameters: + /// - handler: VNImageRequestHandler to be used to analyse the hand pose + /// + func invokeHandPoseDetection(handler: VNImageRequestHandler) { +// DispatchQueue.global(qos: .userInitiated).async { + do { + // Setup the request + let handDetectionRequest = VNDetectHumanHandPoseRequest(completionHandler: self.retrieveHandPoseDetectionResults) + handDetectionRequest.maximumHandCount = 2 + + // Perform the request + try handler.perform([handDetectionRequest]) + } catch { + print("! \(error)") + } +// } + } + + /// Retrieves results from ML analysis of hand pose detection, including the relevant joints and their + /// probabilities. + /// + /// - Parameters: + /// - request: Initial request updated with the results + /// - error: Possible error occuring during the analysis + /// + func retrieveHandPoseDetectionResults(request: VNRequest, error: Error?) { + guard let observations = request.results as? [VNHumanHandPoseObservation] else { + // Prevent from crashing once hands are visible only for certain parts of the record + // TODO: Consider other filling options than just zeros + return self.keyLandmarks.hand.append([[VNHumanHandPoseObservation.JointName: VNPoint]]()) + } + + // Process each observation to find the recognized hand landmarks + var result = [[VNHumanHandPoseObservation.JointName: VNPoint]]() + observations.forEach { observation in + result.append(processHandPoseObservation(observation)) + } + + self.keyLandmarks.hand.append(result) + } + + func processHandPoseObservation(_ observation: VNHumanHandPoseObservation) -> [VNHumanHandPoseObservation.JointName: VNPoint] { + // Retrieve all points. + guard let recognizedPoints = try? observation.recognizedPoints(.all) else { + return [:] + } + + var keyHandLandmarks = [VNHumanHandPoseObservation.JointName: VNPoint]() + let requestedHandLandmarks = ObservationConfiguration.requestedHandLandmarks + + // Process all of the recognized landmarks + for (key, point) in recognizedPoints where point.confidence > MachineLearningConfiguration.handPoseDetectionThreshold { + // Keep the point for further analysis if relevant + if (requestedHandLandmarks.contains(key)) || requestedHandLandmarks.isEmpty { + keyHandLandmarks[key] = point + } + } + + + // Ensure that all landmark keys are present, otherwise fill in a zero + // TODO: Consider other filling options than just zeros + for key in requestedHandLandmarks where keyHandLandmarks[key] == nil { + keyHandLandmarks[key] = VNPoint() + } + + + return keyHandLandmarks + } + + // MARK: Face landmarks detection + + /// + /// Runs a ML model for detecting face landmarks within the scene using the Vision framework. The analysis + /// is performed in the background thread. + /// + /// - Parameters: + /// - handler: VNImageRequestHandler to be used to analyse the face landmarks + /// + func invokeFaceLandmarksDetection(handler: VNImageRequestHandler) { +// DispatchQueue.global(qos: .userInitiated).async { + do { + // Setup the request + let faceLandmarksDetectionRequest = VNDetectFaceLandmarksRequest(completionHandler: self.retrieveFaceLandmarksDetectionResults) + // Perform the request + try handler.perform([faceLandmarksDetectionRequest]) + } catch { + print("! \(error)") + } +// } + } + + /// Retrieves results from ML analysis of face pose detection, including the relevant joints and their + /// probabilities. + /// + /// - Parameters: + /// - request: Initial request updated with the results + /// - error: Possible error occuring during the analysis + /// + func retrieveFaceLandmarksDetectionResults(request: VNRequest, error: Error?) { + guard let observations = request.results?.first as? VNFaceObservation else { + // Prevent from crashing once face is visible only for certain parts of the record + // TODO: Consider other filling options than just zeros + return self.keyLandmarks.face.append([[CGPoint]]()) + } + + // Process each observation to find the recognized face landmarks + var result = [[CGPoint]]() + result.append(processFaceLandmarksObservation(observations)) + + self.keyLandmarks.face.append(result) + } + + func processFaceLandmarksObservation(_ observation: VNFaceObservation) -> [CGPoint] { + // Retrieve all points + guard let recognizedLandmarks = observation.landmarks else { return [] } + return recognizedLandmarks.allPoints?.normalizedPoints ?? [] + } +} diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AccentColor.colorset/Contents.json b/macOS/DataAnnotation/Resources/Assets.xcassets/AccentColor.colorset/Contents.json new file mode 100644 index 0000000..eb87897 --- /dev/null +++ b/macOS/DataAnnotation/Resources/Assets.xcassets/AccentColor.colorset/Contents.json @@ -0,0 +1,11 @@ +{ + "colors" : [ + { + "idiom" : "universal" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/Contents.json b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/Contents.json new file mode 100644 index 0000000..64dc11e --- /dev/null +++ b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/Contents.json @@ -0,0 +1,68 @@ +{ + "images" : [ + { + "filename" : "icon_16x16.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "16x16" + }, + { + "filename" : "icon_16x16@2x.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "16x16" + }, + { + "filename" : "icon_32x32.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "32x32" + }, + { + "filename" : "icon_32x32@2x.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "32x32" + }, + { + "filename" : "icon_128x128.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "128x128" + }, + { + "filename" : "icon_128x128@2x.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "128x128" + }, + { + "filename" : "icon_256x256.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "256x256" + }, + { + "filename" : "icon_256x256@2x.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "256x256" + }, + { + "filename" : "icon_512x512.png", + "idiom" : "mac", + "scale" : "1x", + "size" : "512x512" + }, + { + "filename" : "icon_512x512@2x.png", + "idiom" : "mac", + "scale" : "2x", + "size" : "512x512" + } + ], + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_128x128.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_128x128.png new file mode 100644 index 0000000..bbd5f2a Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_128x128.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_128x128@2x.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_128x128@2x.png new file mode 100644 index 0000000..690ba58 Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_128x128@2x.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_16x16.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_16x16.png new file mode 100644 index 0000000..064e7e7 Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_16x16.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_16x16@2x.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_16x16@2x.png new file mode 100644 index 0000000..bc0c66b Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_16x16@2x.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_256x256.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_256x256.png new file mode 100644 index 0000000..690ba58 Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_256x256.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_256x256@2x.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_256x256@2x.png new file mode 100644 index 0000000..fc73cce Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_256x256@2x.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_32x32.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_32x32.png new file mode 100644 index 0000000..bc0c66b Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_32x32.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_32x32@2x.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_32x32@2x.png new file mode 100644 index 0000000..bab99cc Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_32x32@2x.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_512x512.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_512x512.png new file mode 100644 index 0000000..fc73cce Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_512x512.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_512x512@2x.png b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_512x512@2x.png new file mode 100644 index 0000000..cfa7e84 Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/AppIcon.appiconset/icon_512x512@2x.png differ diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/Contents.json b/macOS/DataAnnotation/Resources/Assets.xcassets/Contents.json new file mode 100644 index 0000000..73c0059 --- /dev/null +++ b/macOS/DataAnnotation/Resources/Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/macOS/DataAnnotation/Resources/Assets.xcassets/Icons.icns b/macOS/DataAnnotation/Resources/Assets.xcassets/Icons.icns new file mode 100644 index 0000000..be75b5b Binary files /dev/null and b/macOS/DataAnnotation/Resources/Assets.xcassets/Icons.icns differ diff --git a/macOS/DataAnnotation/Resources/DataAnnotation.entitlements b/macOS/DataAnnotation/Resources/DataAnnotation.entitlements new file mode 100644 index 0000000..19afff1 --- /dev/null +++ b/macOS/DataAnnotation/Resources/DataAnnotation.entitlements @@ -0,0 +1,10 @@ + + + + + com.apple.security.app-sandbox + + com.apple.security.files.user-selected.read-write + + + diff --git a/macOS/DataAnnotation/Resources/Info.plist b/macOS/DataAnnotation/Resources/Info.plist new file mode 100644 index 0000000..355e6ae --- /dev/null +++ b/macOS/DataAnnotation/Resources/Info.plist @@ -0,0 +1,28 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + $(CURRENT_PROJECT_VERSION) + LSApplicationCategoryType + public.app-category.utilities + LSMinimumSystemVersion + $(MACOSX_DEPLOYMENT_TARGET) + CFBundleIconFile + Resources/Icons.icns + + diff --git a/macOS/DataAnnotation/Resources/Preview Assets.xcassets/Contents.json b/macOS/DataAnnotation/Resources/Preview Assets.xcassets/Contents.json new file mode 100644 index 0000000..73c0059 --- /dev/null +++ b/macOS/DataAnnotation/Resources/Preview Assets.xcassets/Contents.json @@ -0,0 +1,6 @@ +{ + "info" : { + "author" : "xcode", + "version" : 1 + } +} diff --git a/macOS/DataAnnotation/Screens/Anotate Dataset/AnotateDatasetView.swift b/macOS/DataAnnotation/Screens/Anotate Dataset/AnotateDatasetView.swift new file mode 100644 index 0000000..387e23e --- /dev/null +++ b/macOS/DataAnnotation/Screens/Anotate Dataset/AnotateDatasetView.swift @@ -0,0 +1,63 @@ +// +// AnotateDatasetView.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 24.12.2020. +// + +import SwiftUI + +struct AnotateDatasetView: View { + @ObservedObject private(set) var viewModel: ViewModel + + var body: some View { + VStack { + Spacer() + + HStack { + if viewModel.isStartProcessingActive { + box(title: "Classes", subtitle: "\(viewModel.subdirectories)") + box(title: "Items", subtitle: "\(viewModel.files)") + } else { + Text("Drag and drop dataset") + } + } + .modifier(CardViewModifier()) + .onTapGesture { viewModel.selectFile() } + .onDrop(of: ["public.file-url"], + isTargeted: nil, + perform: viewModel.handleOnDrop(providers:)) + + Spacer() + + HStack(alignment: .bottom, spacing: 32.0) { + Button(action: viewModel.selectFile) { + Text("Load Dataset") + } + + if viewModel.isStartProcessingActive { + Button(action: viewModel.startAnnotate) { + Text("Start Processing") + } + } + } + .padding() + } + } + + func box(title: String, subtitle: String) -> some View { + VStack { + Text(subtitle) + .font(.title) + .fontWeight(.semibold) + + Text(title) + } + .padding() + } +} + +struct ComplexView_Previews: PreviewProvider { + static var previews: some View { + AnotateDatasetView(viewModel: .init()) } +} diff --git a/macOS/DataAnnotation/Screens/Anotate Dataset/AnotateDatasetViewModel.swift b/macOS/DataAnnotation/Screens/Anotate Dataset/AnotateDatasetViewModel.swift new file mode 100644 index 0000000..e3764ef --- /dev/null +++ b/macOS/DataAnnotation/Screens/Anotate Dataset/AnotateDatasetViewModel.swift @@ -0,0 +1,114 @@ +// +// AnotateDatasetViewModel.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 24.12.2020. +// + +import SwiftUI +import CreateML + +extension AnotateDatasetView { + class ViewModel: ObservableObject { + private let fileManager: FileManager + private let dataStructuringManager: DataStructuringManager + private let observationConfiguration: ObservationConfiguration + private var analysisManager: VisionAnalysisManager! + + @Published var selectedFolderUrl: URL? + @Published var subdirectories: Int = 0 + @Published var files: Int = 0 + + var isStartProcessingActive: Bool { subdirectories > 0 && files > 0} + + init(fileManager: FileManager = .default, + dataStructuringManager: DataStructuringManager = .init(), + observationConfiguration: ObservationConfiguration = .init()) { + self.dataStructuringManager = dataStructuringManager + self.observationConfiguration = observationConfiguration + self.fileManager = fileManager + } + + func selectFile() { + NSOpenPanel.openFolder { [unowned self] result in + if case let .success(videoUrl) = result { + selectedFolderUrl = videoUrl + subdirectories = numberOfFolders(from: videoUrl) + files = numberOfFiles(from: videoUrl) + } + } + } + + func startAnnotate() { + guard let selectedFolderUrl = selectedFolderUrl else { fatalError("URL cannot be empty")} + let datasetManager = DatasetManager(directoryPath: selectedFolderUrl.path, fps: MachineLearningConfiguration.framesPerSecond) + + datasetManager.generateMLTable { [weak self] result in + guard let self = self else { return } + + switch result { + case .success(let data): + self.saveCVS(data: data) + case .failure(let error): + print(error) + } + } + } + + func saveCVS(data: MLDataTable) { + DispatchQueue.main.async { + let savePanel = NSSavePanel() + savePanel.canCreateDirectories = true + savePanel.nameFieldStringValue = "result.csv" + savePanel.showsTagField = false + savePanel.level = NSWindow.Level(rawValue: Int(CGWindowLevelForKey(.modalPanelWindow))) + savePanel.begin { result in + guard result == .OK, let url = savePanel.url else { + print("Failed to get file location") + return + } + + do { + try data.writeCSV(to: url) + } catch { + print(error) + } + } + } + } + + func handleOnDrop(providers: [NSItemProvider]) -> Bool { + guard let item = providers.first else { return false } + item.loadItem(forTypeIdentifier: "public.file-url", options: nil) { (urlData, error) in + DispatchQueue.main.async { + guard let urlData = urlData as? Data else { return } + let url = NSURL(absoluteURLWithDataRepresentation: urlData, relativeTo: nil) as URL + + guard url.isDirectory else { return } + self.selectedFolderUrl = url + self.subdirectories = self.numberOfFolders(from: url) + self.files = self.numberOfFiles(from: url) + } + } + return true + } + + private func numberOfFolders(from videoUrl: URL) -> Int { + let contentsOfDirectory = try? fileManager.contentsOfDirectory(atPath: videoUrl.path) + return contentsOfDirectory?.filter({ $0.starts(with: ".") == false }).count ?? 0 + } + + private func numberOfFiles(from url: URL) -> Int { + let contentsOfDirectory = try? fileManager.contentsOfDirectory(atPath: url.path) + return contentsOfDirectory? + .filter({ $0.starts(with: ".") == false }) + .compactMap({ subdirectory -> Int? in + let currentLabelPath = url.path.appending("/" + subdirectory + "/") + return try? fileManager.contentsOfDirectory(atPath: currentLabelPath) + .filter({ $0.starts(with: ".") == false }) + .count + }) + .reduce(0, +) ?? 0 + } + } +} diff --git a/macOS/DataAnnotation/Screens/Anotate Video/AnotateVideoView.swift b/macOS/DataAnnotation/Screens/Anotate Video/AnotateVideoView.swift new file mode 100644 index 0000000..d29df11 --- /dev/null +++ b/macOS/DataAnnotation/Screens/Anotate Video/AnotateVideoView.swift @@ -0,0 +1,59 @@ +// +// AnotateVideoView.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 15.12.2020. +// + +import SwiftUI +import AVKit + +struct AnotateVideoView: View { + @ObservedObject private(set) var viewModel: ViewModel + + var body: some View { + VStack { + + Spacer() + + ZStack { + if let url = viewModel.selectedVideoUrl { + VideoPlayer(player: AVPlayer(url: url)) + } else { + Text("Drag and drop video file") + } + } + .onTapGesture { viewModel.selectFile() } + .onDrop(of: ["public.file-url"], + isTargeted: nil, + perform: viewModel.handleOnDrop(providers:)) + .modifier(CardViewModifier()) + + if let nameVideoUrl = viewModel.nameVideoUrl { + Text(nameVideoUrl) + } + + Spacer() + + HStack(alignment: .bottom, spacing: 32.0) { + Button(action: viewModel.selectFile) { + Text("Load Video") + } + + if viewModel.isStartProcessingActive { + Button(action: viewModel.startAnnotate) { + Text("Start Processing") + } + } + } + .padding() + } + } +} + +struct SimpleView_Previews: PreviewProvider { + static var previews: some View { + AnotateVideoView(viewModel: AnotateVideoView.ViewModel()) + .previewLayout(.device) + } +} diff --git a/macOS/DataAnnotation/Screens/Anotate Video/AnotateVideoViewModel.swift b/macOS/DataAnnotation/Screens/Anotate Video/AnotateVideoViewModel.swift new file mode 100644 index 0000000..69b23a3 --- /dev/null +++ b/macOS/DataAnnotation/Screens/Anotate Video/AnotateVideoViewModel.swift @@ -0,0 +1,101 @@ +// +// AnotateVideoViewModel.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 13.12.2020. +// + +import SwiftUI +import CreateML + +extension AnotateVideoView { + class ViewModel: ObservableObject { + private let dataStructuringManager: DataStructuringManager + private let observationConfiguration: ObservationConfiguration + private var analysisManager: VisionAnalysisManager! + private let operationsQueue = OperationQueue() + + @Published var selectedVideoUrl: URL? + @Published var nameVideoUrl: String? + + var isStartProcessingActive: Bool { selectedVideoUrl != nil } + + init(dataStructuringManager: DataStructuringManager = .init(), + observationConfiguration: ObservationConfiguration = .init()) { + self.dataStructuringManager = dataStructuringManager + self.observationConfiguration = observationConfiguration + } + + func selectFile() { + NSOpenPanel.openVideo { [unowned self] result in + if case let .success(videoUrl) = result { + self.selectedVideoUrl = videoUrl + self.nameVideoUrl = videoUrl.pathComponents.elementBeforeLast + } + } + } + + func startAnnotate() { + guard let selectedVideoUrl = selectedVideoUrl, + let nameVideoUrl = nameVideoUrl else { fatalError("URL cannot be empty")} + + self.analysisManager = VisionAnalysisManager(videoUrl: selectedVideoUrl, + fps: UserDefaults.standard.integer(forKey: "fps")) + + let videoAnalysisOp = VideoAnalysisOperation(visionAnalysisManager: self.analysisManager) { _ in + do { + let result = VisionAnalysisResult( + keyLandmarks: self.analysisManager.keyLandmarks, + videoSize: self.analysisManager.videoSize, + fps: self.analysisManager.fps) + let data = try self.dataStructuringManager.combineData( + labels: [nameVideoUrl], + results: [result]) + + self.saveCVS(data: data) + } catch { + print(error) + } + } + + operationsQueue.addOperations([videoAnalysisOp], waitUntilFinished: false) + } + + func saveCVS(data: MLDataTable) { + DispatchQueue.main.async { + let savePanel = NSSavePanel() + savePanel.canCreateDirectories = true + savePanel.nameFieldStringValue = "result.csv" + savePanel.showsTagField = false + savePanel.level = NSWindow.Level(rawValue: Int(CGWindowLevelForKey(.modalPanelWindow))) + savePanel.begin { result in + guard result == .OK, let url = savePanel.url else { + print("Failed to get file location") + return + } + + do { + try data.writeCSV(to: url) + } catch { + print(error) + } + } + } + } + + func handleOnDrop(providers: [NSItemProvider]) -> Bool { + guard let item = providers.first else { return false } + item.loadItem(forTypeIdentifier: "public.file-url", options: nil) { (urlData, error) in + DispatchQueue.main.async { + guard let urlData = urlData as? Data else { return } + let url = NSURL(absoluteURLWithDataRepresentation: urlData, relativeTo: nil) as URL + + guard Constant.allowedFileTypes.contains(url.pathExtension) else { return } + self.selectedVideoUrl = url + self.nameVideoUrl = url.pathComponents.elementBeforeLast + } + } + return true + } + } +} diff --git a/macOS/DataAnnotation/Screens/ContentView.swift b/macOS/DataAnnotation/Screens/ContentView.swift new file mode 100644 index 0000000..81daf39 --- /dev/null +++ b/macOS/DataAnnotation/Screens/ContentView.swift @@ -0,0 +1,41 @@ +// +// ContentView.swift +// SLR data annotation +// +// Created by Thành Đỗ Long on 12.12.2020. +// + +import SwiftUI + +struct ContentView: View { + var body: some View { + NavigationView { + content + .frame(minWidth: 200, idealWidth: 250, maxWidth: 300) + AnotateVideoView(viewModel: .init()) + } + .frame(minWidth: 1000, minHeight: 600) + .navigationTitle("Pose Data Annotator") + } + + var content: some View { + List { + NavigationLink(destination: AnotateVideoView(viewModel: .init())) { + Label("Anotate video", systemImage: "book.closed") + } + NavigationLink(destination: AnotateDatasetView(viewModel: .init())) { + Label("Anotate dataset", systemImage: "list.bullet.rectangle") + } + } + .animation(.spring()) + .listStyle(SidebarListStyle()) + } +} + +#if DEBUG +struct ContentView_Previews: PreviewProvider { + static var previews: some View { + ContentView() + } +} +#endif diff --git a/macOS/DataAnnotation/Screens/Modifiers/CardViewModifier.swift b/macOS/DataAnnotation/Screens/Modifiers/CardViewModifier.swift new file mode 100644 index 0000000..29daa20 --- /dev/null +++ b/macOS/DataAnnotation/Screens/Modifiers/CardViewModifier.swift @@ -0,0 +1,21 @@ +// +// CardViewModifier.swift +// DataAnnotation +// +// Created by Thành Đỗ Long on 06.05.2021. +// + +import SwiftUI + +struct CardViewModifier: ViewModifier { + func body(content: Content) -> some View { + content + .font(.title2) + .foregroundColor(.white) + .frame(width: 480, height: 480, alignment: .center) + .background(Color.black.opacity(0.8)) + .cornerRadius(8) + .shadow(color: .black, + radius: 10) + } +} diff --git a/macOS/DataAnnotation/Screens/Settings/AnalysisSettingsView.swift b/macOS/DataAnnotation/Screens/Settings/AnalysisSettingsView.swift new file mode 100644 index 0000000..f3db7eb --- /dev/null +++ b/macOS/DataAnnotation/Screens/Settings/AnalysisSettingsView.swift @@ -0,0 +1,25 @@ +// +// AnalysisSettingsView.swift +// DataAnnotation +// +// Created by Matyáš Boháček on 07.01.2021. +// + +import SwiftUI + +struct AnalysisSettingsView: View { + @AppStorage("handPoseDetectionThreshold") private var handPoseDetectionThreshold = 0.1 + @AppStorage("bodyPoseDetectionThreshold") private var bodyPoseDetectionThreshold = 0.01 + + var body: some View { + Form { + Slider(value: $handPoseDetectionThreshold, in: 0.01...0.3) { + Text("Hand Pose Detection Threshold: \(handPoseDetectionThreshold, specifier: "%.3f")") + } + Slider(value: $bodyPoseDetectionThreshold, in: 0.01...0.3) { + Text("Body Pose Detection Threshold: \(bodyPoseDetectionThreshold, specifier: "%.3f")") + } + } + .padding() + } +} diff --git a/macOS/DataAnnotation/Screens/Settings/GeneralSettingsView.swift b/macOS/DataAnnotation/Screens/Settings/GeneralSettingsView.swift new file mode 100644 index 0000000..3f77aaa --- /dev/null +++ b/macOS/DataAnnotation/Screens/Settings/GeneralSettingsView.swift @@ -0,0 +1,30 @@ +// +// GeneralSettingsView.swift +// DataAnnotation +// +// Created by Matyáš Boháček on 07.01.2021. +// + +import SwiftUI + +struct GeneralSettingsView: View { + @AppStorage(ObservationType.handLandmarks.rawValue) private var outputHandsLandmarks: Bool = true + @AppStorage(ObservationType.bodyLandmarks.rawValue) private var outputBodyLandmarks: Bool = true + @AppStorage(ObservationType.faceLandmarks.rawValue) private var outputFaceLandmarks: Bool = true + + var body: some View { + Form { + Text("Included body parts in output:") + + VStack(alignment: .leading) { + Toggle("Hands landmarks", isOn: $outputHandsLandmarks) + Toggle("Body landmarks", isOn: $outputBodyLandmarks) + Toggle("Face landmarks", isOn: $outputFaceLandmarks) + } + .padding() + + } + .padding() + } +} + diff --git a/macOS/DataAnnotation/Screens/Settings/SettingsView.swift b/macOS/DataAnnotation/Screens/Settings/SettingsView.swift new file mode 100644 index 0000000..28fbc9a --- /dev/null +++ b/macOS/DataAnnotation/Screens/Settings/SettingsView.swift @@ -0,0 +1,37 @@ +// +// SettingsView.swift +// DataAnnotation +// +// Created by Matyáš Boháček on 07.01.2021. +// + +import SwiftUI + +struct SettingsView: View { + private enum Tabs: Hashable { + case general, video, analysis + } + + var body: some View { + TabView { + GeneralSettingsView() + .tabItem { + Label("General", systemImage: "gear") + } + .tag(Tabs.general) + VideoSettingsView() + .tabItem { + Label("Video", systemImage: "video.fill") + } + .tag(Tabs.video) + AnalysisSettingsView() + .tabItem { + Label("Analysis", systemImage: "figure.wave") + } + .tag(Tabs.analysis) + } + .padding(20) + .frame(width: 450, height: 200) + .navigationTitle("Preferences") + } +} diff --git a/macOS/DataAnnotation/Screens/Settings/VideoSettingsView.swift b/macOS/DataAnnotation/Screens/Settings/VideoSettingsView.swift new file mode 100644 index 0000000..eb35e16 --- /dev/null +++ b/macOS/DataAnnotation/Screens/Settings/VideoSettingsView.swift @@ -0,0 +1,22 @@ +// +// VideoSettingsView.swift +// DataAnnotation +// +// Created by Matyáš Boháček on 07.01.2021. +// + +import SwiftUI + +struct VideoSettingsView: View { + @AppStorage("fps") private var fps: Int = 1 + + var body: some View { + Form { + HStack { + Stepper("Frames per second", value: $fps, in: 1...10) + Text("\(fps)") + } + } + .padding() + } +} diff --git a/macOS/DataAnnotationTests/DataAnnotationTests.swift b/macOS/DataAnnotationTests/DataAnnotationTests.swift new file mode 100644 index 0000000..9da435c --- /dev/null +++ b/macOS/DataAnnotationTests/DataAnnotationTests.swift @@ -0,0 +1,33 @@ +// +// DataAnnotationTests.swift +// DataAnnotationTests +// +// Created by Thành Đỗ Long on 15.12.2020. +// + +import XCTest +@testable import DataAnnotation + +class DataAnnotationTests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // This is an example of a functional test case. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testPerformanceExample() throws { + // This is an example of a performance test case. + self.measure { + // Put the code you want to measure the time of here. + } + } + +} diff --git a/macOS/DataAnnotationTests/Info.plist b/macOS/DataAnnotationTests/Info.plist new file mode 100644 index 0000000..64d65ca --- /dev/null +++ b/macOS/DataAnnotationTests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/macOS/DataAnnotationUITests/DataAnnotationUITests.swift b/macOS/DataAnnotationUITests/DataAnnotationUITests.swift new file mode 100644 index 0000000..64f2dc1 --- /dev/null +++ b/macOS/DataAnnotationUITests/DataAnnotationUITests.swift @@ -0,0 +1,42 @@ +// +// DataAnnotationUITests.swift +// DataAnnotationUITests +// +// Created by Thành Đỗ Long on 15.12.2020. +// + +import XCTest + +class DataAnnotationUITests: XCTestCase { + + override func setUpWithError() throws { + // Put setup code here. This method is called before the invocation of each test method in the class. + + // In UI tests it is usually best to stop immediately when a failure occurs. + continueAfterFailure = false + + // In UI tests it’s important to set the initial state - such as interface orientation - required for your tests before they run. The setUp method is a good place to do this. + } + + override func tearDownWithError() throws { + // Put teardown code here. This method is called after the invocation of each test method in the class. + } + + func testExample() throws { + // UI tests must launch the application that they test. + let app = XCUIApplication() + app.launch() + + // Use recording to get started writing UI tests. + // Use XCTAssert and related functions to verify your tests produce the correct results. + } + + func testLaunchPerformance() throws { + if #available(macOS 10.15, iOS 13.0, tvOS 13.0, *) { + // This measures how long it takes to launch your application. + measure(metrics: [XCTApplicationLaunchMetric()]) { + XCUIApplication().launch() + } + } + } +} diff --git a/macOS/DataAnnotationUITests/Info.plist b/macOS/DataAnnotationUITests/Info.plist new file mode 100644 index 0000000..64d65ca --- /dev/null +++ b/macOS/DataAnnotationUITests/Info.plist @@ -0,0 +1,22 @@ + + + + + CFBundleDevelopmentRegion + $(DEVELOPMENT_LANGUAGE) + CFBundleExecutable + $(EXECUTABLE_NAME) + CFBundleIdentifier + $(PRODUCT_BUNDLE_IDENTIFIER) + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + $(PRODUCT_NAME) + CFBundlePackageType + $(PRODUCT_BUNDLE_PACKAGE_TYPE) + CFBundleShortVersionString + 1.0 + CFBundleVersion + 1 + + diff --git a/preprocessor/SLR_Data_Annotation.playground/Contents.swift b/preprocessor/SLR_Data_Annotation.playground/Contents.swift new file mode 100644 index 0000000..07949d3 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Contents.swift @@ -0,0 +1,56 @@ +import Cocoa +import Foundation +import CreateML + + +// +// Demonstration of the annotations script. The video is analyzed for all of the relevant +// key landmarks and this data is further structured into a CSV file. +// +// TODO: Update the Vision keys to new, non-deprecated values +// + + +// Path to dataset +let fileManager = FileManager.default +var datasetPath = ((NSSearchPathForDirectoriesInDomains(.desktopDirectory, .userDomainMask, true) as [String]).first ?? "").appending("/dictio_cislovky") + + +/* +// OPTION 1: PROCESSING ONE BY ONE + + +// Experimental label and URL to one of the videos +let labels = ["test"] +let videoUrl = URL(fileURLWithPath: "/Users/matyasbohacek/Desktop/dictio_cislovky/1/A_11.mp4") + +// Load the video into the VisionAnalysisManager +let analysisManager = VisionAnalysisManager(videoUrl: videoUrl, fps: 3) + +// Annotate for the necessary elements +analysisManager?.annotate() + +do { + // Structure the data into a MLDataTable + let dataTable = try OutputDataStructuringManager.combineData(labels: labels, visionAnalyses: [analysisManager!]) + + // Save it to Desktop folder in CSV + try dataTable.writeCSV(to: URL(fileURLWithPath: datasetPath).appendingPathComponent("individual_test.csv")) +} catch {} +*/ + +// OPTION 2: PROCESSING THE ENTIRE DATASET + + +// Create a dataset manager to process the entire dataset +let datasetManager = DatasetManager(directoryPath: datasetPath, fps: 3) + +do { + // Structure the data into a MLDataTable + let dataTable = try datasetManager.generateMLTable() + + // Save it to Desktop folder in CSV + try dataTable.writeCSV(to: URL(fileURLWithPath: datasetPath).appendingPathComponent("dataset_test.csv")) +} catch { + print(error) +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/MachineLearningConfiguration.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/MachineLearningConfiguration.swift new file mode 100644 index 0000000..81c98d6 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/MachineLearningConfiguration.swift @@ -0,0 +1,17 @@ +import Foundation + + +public class MachineLearningConfiguration { + + /// + /// Threshold for the hand pose detection using the Vision framework. + /// + public static let handPoseDetectionThreshold: Float = 0.1 + + /// + /// Threshold for the body pose detection using the Vision framework. + /// + public static let bodyPoseDetectionThreshold: Float = 0.01 + + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationConfiguration.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationConfiguration.swift new file mode 100644 index 0000000..3990b2e --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationConfiguration.swift @@ -0,0 +1,41 @@ +// +// ObservationConfiguration.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Vision + + +public class ObservationConfiguration { + + /// + /// List of all the data annotations to be analyzed using Vision. + /// + public static let desiredDataAnnotations: [ObservationType] = [.bodyLandmarks, .handLandmarks] + + /// + /// List of requested recognized body landmarks key in order to filter out any redundant. + /// + /// - Warning: If empty, all body landmarks are requested + /// + public static let requestedBodyLandmarks: [VNHumanBodyPoseObservation.JointName] = [ + .nose, + .rightEye, .leftEye, + .rightEar, .leftEar, + .rightShoulder, .leftShoulder, + .rightElbow, .leftElbow, + .rightWrist, .leftWrist + ] + + /// + /// List of requested recognized hand landmarks key in order to filter out any redundant. + /// + /// - Warning: If empty, all hand landmarks are requested + /// + public static let requestedHandLandmarks: [VNHumanHandPoseObservation.JointName] = [] + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationTerminology.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationTerminology.swift new file mode 100644 index 0000000..39d984a --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationTerminology.swift @@ -0,0 +1,75 @@ +// +// ObservationTerminology.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Vision + +public class ObservationTerminology { + + /// + /// Dictionary for conversion bewteen the `VNRecognizedPointKey` and custom methodology + /// `String` identifiers for the body landmarks. + /// + public static let bodyLandmarksKeysToLabels: [VNHumanBodyPoseObservation.JointName: String] = [ + .nose : "nose", + .rightEye : "rightEye", + .leftEye: "leftEye", + .rightEar: "rightEar", + .leftEar: "leftEar", + .rightShoulder: "rightShoulder", + .leftShoulder: "leftShoulder", + .rightElbow: "rightElbow", + .leftElbow: "leftElbow", + .rightWrist: "rightWrist", + .leftWrist: "leftWrist" + ] + + /// + /// Dictionary for conversion bewteen the `VNRecognizedPointKey` and custom methodology + /// `String` identifiers for the hand landmarks. + /// + public static let handLandmarksKeysToLabels: [VNHumanHandPoseObservation.JointName: String] = [ + .wrist: "wrist", + .indexTip: "indexTip", + .indexDIP: "indexDIP", + .indexPIP: "indexPIP", + .indexMCP: "indexMCP", + .middleTip: "middleTip", + .middleDIP: "middleDIP", + .middlePIP: "middlePIP", + .middleMCP: "middleMCP", + .ringTip: "ringTip", + .ringDIP: "ringDIP", + .ringPIP: "ringPIP", + .ringMCP: "ringMCP", + .littleTip: "littleTip", + .littleDIP: "littleDIP", + .littlePIP: "littlePIP", + .littleMCP: "littleMCP", + .thumbTip: "thumbTip", + .thumbIP: "thumbIP", + .thumbMP: "thumbMP", + .thumbCMC: "thumbCMC" + ] + + /// + /// Order of the hand landmarks key order in the String CSV format for the training data. + /// + public static let handLandmarksKeyOrder = ["indexDip0X", "indexDip0Y", "indexDip1X", "indexDip1Y", "indexMcp0X", "indexMcp0Y", "indexMcp1X", "indexMcp1Y", "indexPip0X", "indexPip0Y", "indexPip1X", "indexPip1Y", "indexTip0X", "indexTip0Y", "indexTip1X", "indexTip1Y", "littleDip0X", "littleDip0Y", "littleDip1X", "littleDip1Y", "littleMcp0X", "littleMcp0Y", "littleMcp1X", "littleMcp1Y", "littlePip0X", "littlePip0Y", "littlePip1X", "littlePip1Y", "littleTip0X", "littleTip0Y", "littleTip1X", "littleTip1Y", "middleDip0X", "middleDip0Y", "middleDip1X", "middleDip1Y", "middleMcp0X", "middleMcp0Y", "middleMcp1X", "middleMcp1Y", "middlePip0X", "middlePip0Y", "middlePip1X", "middlePip1Y", "middleTip0X", "middleTip0Y", "middleTip1X", "middleTip1Y", "ringDip0X", "ringDip0Y", "ringDip1X", "ringDip1Y", "ringMcp0X", "ringMcp0Y", "ringMcp1X", "ringMcp1Y", "ringPip0X", "ringPip0Y", "ringPip1X", "ringPip1Y", "ringTip0X", "ringTip0Y", "ringTip1X", "ringTip1Y", "thumbCmc0X", "thumbCmc0Y", "thumbCmc1X", "thumbCmc1Y", "thumbIp0X", "thumbIp0Y", "thumbIp1X", "thumbIp1Y", "thumbMp0X", "thumbMp0Y", "thumbMp1X", "thumbMp1Y", "thumbTip0X", "thumbTip0Y", "thumbTip1X", "thumbTip1Y", "wrist0X", "wrist0Y", "wrist1X", "wrist1Y"] + + /// + /// Order of the body landmarks key order in the String CSV format for the training data. + /// + public static let bodyLandmarksKeyOrder = ["leftEarX", "leftEarY", "leftElbowX", "leftElbowY", "leftEyeX", "leftEyeY", "leftShoulderX", "leftShoulderY", "leftWristX", "leftWristY", "noseX", "noseY", "rightEarX", "rightEarY", "rightElbowX", "rightElbowY", "rightEyeX", "rightEyeY", "rightShoulderX", "rightShoulderY", "rightWristX", "rightWristY"] + + /// + /// Order of the face landmarks key order in the String CSV format for the training data. + /// + public static let faceLandmarksKeyOrder = ["landmark0X", "landmark0Y", "landmark10X", "landmark10Y", "landmark11X", "landmark11Y", "landmark12X", "landmark12Y", "landmark13X", "landmark13Y", "landmark14X", "landmark14Y", "landmark15X", "landmark15Y", "landmark16X", "landmark16Y", "landmark17X", "landmark17Y", "landmark18X", "landmark18Y", "landmark19X", "landmark19Y", "landmark1X", "landmark1Y", "landmark20X", "landmark20Y", "landmark21X", "landmark21Y", "landmark22X", "landmark22Y", "landmark23X", "landmark23Y", "landmark24X", "landmark24Y", "landmark25X", "landmark25Y", "landmark26X", "landmark26Y", "landmark27X", "landmark27Y", "landmark28X", "landmark28Y", "landmark29X", "landmark29Y", "landmark2X", "landmark2Y", "landmark30X", "landmark30Y", "landmark31X", "landmark31Y", "landmark32X", "landmark32Y", "landmark33X", "landmark33Y", "landmark34X", "landmark34Y", "landmark35X", "landmark35Y", "landmark36X", "landmark36Y", "landmark37X", "landmark37Y", "landmark38X", "landmark38Y", "landmark39X", "landmark39Y", "landmark3X", "landmark3Y", "landmark40X", "landmark40Y", "landmark41X", "landmark41Y", "landmark42X", "landmark42Y", "landmark43X", "landmark43Y", "landmark44X", "landmark44Y", "landmark45X", "landmark45Y", "landmark46X", "landmark46Y", "landmark47X", "landmark47Y", "landmark48X", "landmark48Y", "landmark49X", "landmark49Y", "landmark4X", "landmark4Y", "landmark50X", "landmark50Y", "landmark51X", "landmark51Y", "landmark52X", "landmark52Y", "landmark53X", "landmark53Y", "landmark54X", "landmark54Y", "landmark55X", "landmark55Y", "landmark56X", "landmark56Y", "landmark57X", "landmark57Y", "landmark58X", "landmark58Y", "landmark59X", "landmark59Y", "landmark5X", "landmark5Y", "landmark60X", "landmark60Y", "landmark61X", "landmark61Y", "landmark62X", "landmark62Y", "landmark63X", "landmark63Y", "landmark64X", "landmark64Y", "landmark65X", "landmark65Y", "landmark66X", "landmark66Y", "landmark67X", "landmark67Y", "landmark68X", "landmark68Y", "landmark69X", "landmark69Y", "landmark6X", "landmark6Y", "landmark70X", "landmark70Y", "landmark71X", "landmark71Y", "landmark72X", "landmark72Y", "landmark73X", "landmark73Y", "landmark74X", "landmark74Y", "landmark75X", "landmark75Y", "landmark76X", "landmark76Y", "landmark7X", "landmark7Y", "landmark8X", "landmark8Y", "landmark9X", "landmark9Y"] + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationType.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationType.swift new file mode 100644 index 0000000..b1b02f7 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Configuration/ObservationType.swift @@ -0,0 +1,15 @@ +// +// ObservationType.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation + +public enum ObservationType { + case bodyLandmarks + case handLandmarks + case faceLandmarks +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/DataStructuringManager.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/DataStructuringManager.swift new file mode 100644 index 0000000..ee48990 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/DataStructuringManager.swift @@ -0,0 +1,130 @@ +// +// DataStructuringManager.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Vision + + +public class DataStructuringManager { + + /// + /// Converts the data from the hand landmarks observations to landmark keys, for further data + /// structuring. + /// + /// - Parameters: + /// - recognizedLandmarks: Array of arrays of dictionaries with data from Vision analysis + /// + /// - Returns: Dictionary of Strings to arrays of Doubles for further processing + /// + public static func convertHandLandmarksToMLData(recognizedLandmarks: [[[VNHumanHandPoseObservation.JointName: VNPoint]]]) -> [String: [Double]] { + // Prepare the dictionary for all of the possible landmarks keys to be added + var converted = [String: [Double]]() + for keyOrdered in ObservationTerminology.handLandmarksKeyOrder { + converted[keyOrdered] = [] + converted[keyOrdered] = [] + } + + for (observationIndex, observation) in recognizedLandmarks.enumerated() { + // Ensure that maximally two hand are analyzed + var maxObservationIndex = 2 + if maxObservationIndex > observation.count { + maxObservationIndex = observation.count + } + + // Structure the data with the new keys + for (handIndex, data) in observation[0.. [String: [Double]] { + // Prepare the dictionary for all of the possible landmarks keys to be added + var converted = [String: [Double]]() + for keyOrdered in ObservationTerminology.bodyLandmarksKeyOrder { + converted[keyOrdered] = [] + } + + for (observationIndex, observation) in recognizedLandmarks.enumerated() { + if !observation.isEmpty { + // Structure the data with the new keys + for (landmarkKey, value) in observation[0] { + converted[ObservationTerminology.bodyLandmarksKeysToLabels[landmarkKey]! + "X"]?.append(Double(value.location.x)) + converted[ObservationTerminology.bodyLandmarksKeysToLabels[landmarkKey]! + "Y"]?.append(Double(value.location.y)) + } + } + + // Fill in the values for all potential landmarks that were not captured + for keyOrdered in ObservationTerminology.bodyLandmarksKeyOrder { + if converted[keyOrdered]?.count != observationIndex + 1 { + converted[keyOrdered]?.append(0.0) + } + } + } + + return converted + } + + /// + /// Converts the data from the face landmarks observations to landmark keys, for further data + /// structuring. + /// + /// - Parameters: + /// - recognizedLandmarks: Array of arrays of dictionaries with data from Vision analysis + /// + /// - Returns: Dictionary of Strings to arrays of Doubles for further processing + /// + public static func convertFaceLandmarksToMLData(recognizedLandmarks: [[[CGPoint]]]) -> [String: [Double]] { + // Prepare the dictionary for all of the possible landmarks keys to be added + var converted = [String: [Double]]() + for keyOrdered in ObservationTerminology.faceLandmarksKeyOrder { + converted[keyOrdered] = [] + } + + for (observationIndex, observation) in recognizedLandmarks.enumerated() { + if !observation.isEmpty { + // Structure the data with the new keys + for (landmarkIndex, landmark) in observation[0].enumerated() { + converted["landmark\(landmarkIndex)X"]?.append(Double(landmark.x)) + converted["landmark\(landmarkIndex)Y"]?.append(Double(landmark.y)) + } + } + + // Fill in the values for all potential landmarks that were not captured + for keyOrdered in ObservationTerminology.faceLandmarksKeyOrder { + if converted[keyOrdered]?.count != observationIndex + 1 { + converted[keyOrdered]?.append(0.0) + } + } + } + + return converted + } + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/DatasetManager.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/DatasetManager.swift new file mode 100644 index 0000000..592d651 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/DatasetManager.swift @@ -0,0 +1,100 @@ +// +// DatasetManager.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 07/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Cocoa +import CreateML + +public class DatasetManager { + + /// + /// Representations of the possible errors occuring in this context + /// + enum DatasetError: Error { + case invalidDirectoryContents + } + + + // MARK: Properties + private let directoryPath: String + private let fps: Int + + private static let fileManager = FileManager.default + + // MARK: Methods + + /// + /// Initiates the DatasetManager for easy data processing and annotations of complete dataset directories. + /// + /// - Parameters: + /// - directoryPath: String path of the dataset directory + /// - fps: Frames per second to be annotated by the individual videos + /// + public init(directoryPath: String, fps: Int) { + self.directoryPath = directoryPath + self.fps = fps + } + + /// + /// Annotates the entire associated dataset and returns the data in a form of a MLDataTable. + /// + /// - Returns: MLDataTable generated from the data + /// + /// - Throws: Corresponding `DatasetError` or `OutputProcessingError`, based on any + /// errors occurring during the data processing or the annotations + /// + public func generateMLTable() throws -> MLDataTable { + var foundSubdirectories = [String]() + var labels = [String]() + var analysesManagers = [VisionAnalysisManager]() + + do { + // Load all of the labels present in the dataset + foundSubdirectories = try DatasetManager.fileManager.contentsOfDirectory(atPath: self.directoryPath) + } catch { + throw DatasetError.invalidDirectoryContents + } + + // Create annotations managers for each of the labels + do { + for subdirectory in foundSubdirectories { + if subdirectory.starts(with: ".") { + continue + } + + // Construct the URL path for each of the labels (items of the repository) + let currentLabelPath = self.directoryPath.appending("/" + subdirectory + "/") + + for item in try DatasetManager.fileManager.contentsOfDirectory(atPath: currentLabelPath) { + // Prevent from non-video formats + if !item.contains(".mp4") { + // TODO: Throw + } + + // Load and process the annotations for each of the videos + let currentItemAnalysisManager = VisionAnalysisManager(videoUrl: URL(fileURLWithPath: currentLabelPath.appending(item)), fps: self.fps) + currentItemAnalysisManager?.annotate() + + analysesManagers.append(currentItemAnalysisManager!) + } + + labels.append(subdirectory) + } + } catch { + throw error + } + + do { + // Structure the data into a MLDataTable + return try OutputDataStructuringManager.combineData(labels: labels, visionAnalyses: analysesManagers) + } catch { + throw error + } + } + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/OutputDataStructuringManager.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/OutputDataStructuringManager.swift new file mode 100644 index 0000000..73306c4 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/OutputDataStructuringManager.swift @@ -0,0 +1,99 @@ +// +// OutputDataStructuringManager.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import CreateML + +public class OutputDataStructuringManager { + + /// + /// Representations of the possible errors occuring in this context + /// + enum OutputProcessingError: Error { + case invalidData + case structuringData + } + + /// + /// Combine the data from multiple VisionAnalysisManagers into a MLDataTable. + /// + /// - Parameters: + /// - labels: Array of String labels of the individuals signs + /// - visionAnalyses: Array of the processed and annotated VisionAnalysisManagers + /// + /// - Returns: Newly constructed MLDataTable + /// + public static func combineData(labels: [String], visionAnalyses: [VisionAnalysisManager]) throws -> MLDataTable { + // Ensure that the data is equally long + print(labels) + print(visionAnalyses.count) + guard labels.count == visionAnalyses.count else { + throw OutputProcessingError.invalidData + } + + // Prepare the structured data in the MLDataTable-processable format + var convertedToMLData = [String: MLDataValueConvertible]() + convertedToMLData["labels"] = labels + + // Stack the data from individual analyses to arrays + var stackedData = [String: [[Double]]]() + var videoMetadata = ["width": [Double](), "height": [Double](), "fps": [Double]()] + + for key in ObservationTerminology.bodyLandmarksKeyOrder + ObservationTerminology.handLandmarksKeyOrder + ObservationTerminology.faceLandmarksKeyOrder { + stackedData[key] = [] + } + + for analysis in visionAnalyses { + let analyzedData = analysis.getData() + + // Append data for body landmarks + if ObservationConfiguration.desiredDataAnnotations.contains(.bodyLandmarks) { + for (key, value) in DataStructuringManager.convertBodyLandmarksToMLData(recognizedLandmarks: analyzedData.0) { + stackedData[key]?.append(value) + } + } + + // Append data for hand landmarks + if ObservationConfiguration.desiredDataAnnotations.contains(.handLandmarks) { + for (key, value) in DataStructuringManager.convertHandLandmarksToMLData(recognizedLandmarks: analyzedData.1) { + stackedData[key]?.append(value) + } + } + + // Append data for face landmarks + if ObservationConfiguration.desiredDataAnnotations.contains(.faceLandmarks) { + for (key, value) in DataStructuringManager.convertFaceLandmarksToMLData(recognizedLandmarks: analyzedData.2) { + stackedData[key]?.append(value) + } + } + + // Add video size information to the dataset + videoMetadata["width"]?.append(Double(analysis.videoSize.width)) + videoMetadata["height"]?.append(Double(analysis.videoSize.height)) + videoMetadata["fps"]?.append(Double(analysis.fps)) + } + + for (key, value) in stackedData { + if !value.isEmpty { + convertedToMLData[key] = value + } + } + + convertedToMLData["video_size_width"] = videoMetadata["width"] + convertedToMLData["video_size_height"] = videoMetadata["height"] + convertedToMLData["video_fps"] = videoMetadata["fps"] + + do { + // Create a MLDataTable on top of the structured data + return try MLDataTable(dictionary: convertedToMLData) + } catch { + throw OutputProcessingError.structuringData + } + } + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/VideoProcessingManager.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/VideoProcessingManager.swift new file mode 100644 index 0000000..9b06f7a --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/Managers/VideoProcessingManager.swift @@ -0,0 +1,96 @@ +// +// VideoProcessingManager.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import AVFoundation + + +public class VideoProcessingManager { + + /// + /// Processes all of the frames from the given video as a list of CGFrames. + /// + /// - Parameters: + /// - videoUrl: URL of the video to be annotated + /// - fps: Frames per second to be annotated + /// + /// - Returns: Array of frames as CGImages + /// + public static func getAllFrames(videoUrl: URL, fps: Int) -> [CGImage] { + // Import the video into AVFoundation + let asset = AVAsset(url: videoUrl) + let duration = CMTimeGetSeconds(asset.duration) + + let generator = AVAssetImageGenerator(asset:asset) + generator.appliesPreferredTrackTransform = true + + var frames = [CGImage]() + + // Process the frames for given frames per second rate at every second + for secondsIndex in 0 ..< Int(ceil(duration)) { + for frameIndex in 0 ..< fps { + let timeForFrame = Double(secondsIndex) + Double(frameIndex) * (1.0 / Double(fps)) + if timeForFrame < duration { + frames.append(getFrame(fromTime: Float64(timeForFrame), generator: generator)!) + } + } + } + + // Prevent additional crashes with the AVFoundation processing + generator.cancelAllCGImageGeneration() + + return frames + } + + /// + /// Converts the frame from the given AVAssetImageGenerator at the given time within + /// the encoded video. + /// + /// - Parameters: + /// - fromTime: Float64 of the time to extract the frame from + /// - generator: AVAssetImageGenerator with the video already encoded + /// + /// - Returns: Desired frame as CGImage + /// + private static func getFrame(fromTime: Float64, generator: AVAssetImageGenerator) -> CGImage? { + let image: CGImage + + // Convert the time to the supported CMTime + let time = CMTimeMakeWithSeconds(fromTime, preferredTimescale: 600) + + do { + // Convert the image at the given time + try image = generator.copyCGImage(at: time, actualTime: nil) + } catch { + return nil + } + + return image + } + + /// + /// Calculates the given video's size. + /// + /// - Parameters: + /// - videoUrl: URL of the video to be annotated + /// + /// - Returns: CGSize of the given video + /// + public static func getVideoSize(videoUrl: URL) -> CGSize { + // Import the video into AVFoundation + let asset = AVAsset(url: videoUrl) + guard let track = asset.tracks(withMediaType: AVMediaType.video).first else { return CGSize() } + + // Calculate the size using the transformation from the track + let size = track.naturalSize.applying(track.preferredTransform) + + // Convert the data into CGSize + return CGSize(width: abs(size.width), height: abs(size.height)) + } + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/Sources/VisionAnalysisManager.swift b/preprocessor/SLR_Data_Annotation.playground/Sources/VisionAnalysisManager.swift new file mode 100644 index 0000000..01477bf --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/Sources/VisionAnalysisManager.swift @@ -0,0 +1,284 @@ +// +// VisionAnalysisManager.swift +// SLR_Data_Annotation +// +// Created by Matyáš Boháček on 01/12/2020. +// Copyright © 2020 Matyáš Boháček. All rights reserved. +// + +import Foundation +import Cocoa +import Vision + + +public class VisionAnalysisManager { + + // MARK: Properties + var videoUrl: URL + var fps: Int = 4 + + private var framesAnnotated = [[String: Bool]]() + public var frames = [CGImage]() + + public var videoSize = CGSize() + + private var keyBodyLandmarks = [[[VNHumanBodyPoseObservation.JointName: VNPoint]]]() + private var keyHandLandmarks = [[[VNHumanHandPoseObservation.JointName: VNPoint]]]() + private var keyFaceLandmarks = [[[CGPoint]]]() + + + // MARK: Methods + + /// + /// Initiates the VisionAnalysisManager which is responsible for the Vision analysis and annotation of any + /// given video. + /// + /// - Parameters: + /// - videoUrl: URL of the video to be annotated + /// - fps: Frames per second to be annotated + /// + public init?(videoUrl: URL, fps: Int) { + self.videoUrl = videoUrl + self.fps = fps + } + + /// + /// Starts the asynchronous process of annotation with the data associated to this VisionAnalysisManager. + /// + public func annotate() { + // Generate the individual frames from the vido + self.frames = VideoProcessingManager.getAllFrames(videoUrl: self.videoUrl, fps: self.fps) + self.framesAnnotated = Array.init(repeating: ["body": false, "hands": false, "face": false], count: self.frames.count) + + // Calculate the size of the video + self.videoSize = VideoProcessingManager.getVideoSize(videoUrl: self.videoUrl) + + for frame in frames { + // Create a VNImageRequestHandler for each of the desired frames + let handler = VNImageRequestHandler(cgImage: frame, options: [:]) + + // Process the Vision data for all of the + invokeBodyPoseDetection(handler: handler) + invokeHandPoseDetection(handler: handler) + invokeFaceLandmarksDetection(handler: handler) + } + } + + /// + /// Returns all of the data analyzed and structured within this from this VisionAnalysisManager. + /// + /// - Returns: Tuple of arrays of arrays of individual dictionaries. The data is structured in the order: + /// body, hands, face. + /// + /// - Warning: If the data annotations still are not finished, empty arrays will be returned. Check + /// `VisionAnalysisManager.isAnnotated()` to find out the current status. + /// + public func getData() -> ([[[VNHumanBodyPoseObservation.JointName: VNPoint]]], [[[VNHumanHandPoseObservation.JointName: VNPoint]]], [[[CGPoint]]]) { + if self.isAnnotated() { + return (self.keyBodyLandmarks, self.keyHandLandmarks, self.keyFaceLandmarks) + } else { + return ([], [], []) + } + } + + /// + /// Determines whether the data associated with this VisionAnalysisManager is already processed and + /// annotated. + /// + /// - Returns: Bool representation whether is the data already annotated + /// + public func isAnnotated() -> Bool { + for frameStatus in self.framesAnnotated { + for (_, value) in frameStatus { + if value == false { + return false + } + } + } + + return true + } + + + // MARK: Body landmarks detection + + /// + /// Runs a ML model for detecting body pose within the scene using the Vision framework. The analysis + /// is performed in the background thread. + /// + /// - Parameters: + /// - handler: VNImageRequestHandler to be used to analyse the body pose + /// + func invokeBodyPoseDetection(handler: VNImageRequestHandler) { + // Run the ML processes on the background thread to prevent lag + DispatchQueue.global(qos: .background).sync { + do { + // Setup the request + let bodyDetectionRequest = VNDetectHumanBodyPoseRequest(completionHandler: retrieveBodyPoseDetectionResults) + + // Perform the request + try handler.perform([bodyDetectionRequest]) + } catch { + print("! \(error)") + } + } + } + + /// Retrieves results from ML analysis of body pose detection, including the relevant joints and their + /// probabilities. + /// + /// - Parameters: + /// - request: Initial request updated with the results + /// - error: Possible error occuring during the analysis + /// + func retrieveBodyPoseDetectionResults(request: VNRequest, error: Error?) { + guard let observations = + request.results as? [VNHumanBodyPoseObservation] else { return } + + // Process each observation to find the recognized body landmarks + var result = [[VNHumanBodyPoseObservation.JointName: VNPoint]]() + observations.forEach { result.append(processBodyPoseObservation($0)) } + + self.keyBodyLandmarks.append(result) + self.framesAnnotated[self.keyBodyLandmarks.count - 1]["body"] = true + } + + func processBodyPoseObservation(_ observation: VNHumanBodyPoseObservation) -> [VNHumanBodyPoseObservation.JointName: VNPoint] { + // Retrieve all points + guard let recognizedPoints = try? observation.recognizedPoints(.all) else { + return [:] + } + + + var keyBodyLandmarks = [VNHumanBodyPoseObservation.JointName: VNPoint]() + + // Process all of the recognized landmarks + for (key, point) in recognizedPoints { + if point.confidence > MachineLearningConfiguration.bodyPoseDetectionThreshold { + // Keep the point for further analysis if relevant + if (!ObservationConfiguration.requestedBodyLandmarks.isEmpty && + ObservationConfiguration.requestedBodyLandmarks.contains(key)) || + ObservationConfiguration.requestedBodyLandmarks.isEmpty { + keyBodyLandmarks[key] = point + } + } + } + + return keyBodyLandmarks + } + + // MARK: Hand landmarks detection + + /// + /// Runs a ML model for detecting hand pose within the scene using the Vision framework. The analysis + /// is performed in the background thread. + /// + /// - Parameters: + /// - handler: VNImageRequestHandler to be used to analyse the hand pose + /// + func invokeHandPoseDetection(handler: VNImageRequestHandler) { + // Run the ML processes on the background thread to prevent lag + DispatchQueue.global(qos: .background).sync { + do { + // Setup the request + let handDetectionRequest = VNDetectHumanHandPoseRequest(completionHandler: retrieveHandPoseDetectionResults) + handDetectionRequest.maximumHandCount = 2 + + // Perform the request + try handler.perform([handDetectionRequest]) + } catch { + print("! \(error)") + } + } + } + + /// Retrieves results from ML analysis of hand pose detection, including the relevant joints and their + /// probabilities. + /// + /// - Parameters: + /// - request: Initial request updated with the results + /// - error: Possible error occuring during the analysis + /// + func retrieveHandPoseDetectionResults(request: VNRequest, error: Error?) { + guard let observations = request.results as? [VNHumanHandPoseObservation] else { return } + + // Process each observation to find the recognized hand landmarks + var result = [[VNHumanHandPoseObservation.JointName: VNPoint]]() + observations.forEach { result.append(processHandPoseObservation($0)) } + + self.keyHandLandmarks.append(result) + self.framesAnnotated[self.keyHandLandmarks.count - 1]["hands"] = true + } + + func processHandPoseObservation(_ observation: VNHumanHandPoseObservation) -> [VNHumanHandPoseObservation.JointName : VNPoint] { + // Retrieve all points. + guard let recognizedPoints = try? observation.recognizedPoints(.all) else { + return [:] + } + + var keyHandLandmarks = [VNHumanHandPoseObservation.JointName : VNPoint]() + + // Process all of the recognized landmarks + for (key, point) in recognizedPoints { + if point.confidence > MachineLearningConfiguration.handPoseDetectionThreshold { + // Keep the point for further analysis if relevant + if (!ObservationConfiguration.requestedHandLandmarks.isEmpty && ObservationConfiguration.requestedHandLandmarks.contains(key)) || ObservationConfiguration.requestedHandLandmarks.isEmpty { + keyHandLandmarks[key] = point + } + } + } + + return keyHandLandmarks + } + + // MARK: Face landmarks detection + + /// + /// Runs a ML model for detecting face landmarks within the scene using the Vision framework. The analysis + /// is performed in the background thread. + /// + /// - Parameters: + /// - handler: VNImageRequestHandler to be used to analyse the face landmarks + /// + func invokeFaceLandmarksDetection(handler: VNImageRequestHandler) { + // Run the ML processes on the background thread to prevent lag + DispatchQueue.global(qos: .background).sync { + do { + // Setup the request + let faceLandmarksDetectionRequest = VNDetectFaceLandmarksRequest(completionHandler: retrieveFaceLandmarksDetectionResults) + // Perform the request + try handler.perform([faceLandmarksDetectionRequest]) + } catch { + print("! \(error)") + } + } + } + + /// Retrieves results from ML analysis of face pose detection, including the relevant joints and their + /// probabilities. + /// + /// - Parameters: + /// - request: Initial request updated with the results + /// - error: Possible error occuring during the analysis + /// + func retrieveFaceLandmarksDetectionResults(request: VNRequest, error: Error?) { + guard let observations = request.results as? [VNFaceObservation] else { return } + + // Process each observation to find the recognized face landmarks + var result = [[CGPoint]]() + observations.forEach { result.append(processFaceLandmarksObservation($0)) } + + self.keyFaceLandmarks.append(result) + self.framesAnnotated[self.keyFaceLandmarks.count - 1]["face"] = true + } + + func processFaceLandmarksObservation(_ observation: VNFaceObservation) -> [CGPoint] { + // Retrieve all points + guard let recognizedLandmarks = observation.landmarks else { + return [] + } + + return recognizedLandmarks.allPoints?.normalizedPoints ?? [] + } + +} diff --git a/preprocessor/SLR_Data_Annotation.playground/contents.xcplayground b/preprocessor/SLR_Data_Annotation.playground/contents.xcplayground new file mode 100644 index 0000000..a8211e5 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/contents.xcplayground @@ -0,0 +1,4 @@ + + + + \ No newline at end of file diff --git a/preprocessor/SLR_Data_Annotation.playground/xcuserdata/thanhdolong.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist b/preprocessor/SLR_Data_Annotation.playground/xcuserdata/thanhdolong.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist new file mode 100644 index 0000000..dd433a8 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/xcuserdata/thanhdolong.xcuserdatad/xcdebugger/Breakpoints_v2.xcbkptlist @@ -0,0 +1,24 @@ + + + + + + + + + diff --git a/preprocessor/SLR_Data_Annotation.playground/xcuserdata/thanhdolong.xcuserdatad/xcschemes/xcschememanagement.plist b/preprocessor/SLR_Data_Annotation.playground/xcuserdata/thanhdolong.xcuserdatad/xcschemes/xcschememanagement.plist new file mode 100644 index 0000000..7da1b76 --- /dev/null +++ b/preprocessor/SLR_Data_Annotation.playground/xcuserdata/thanhdolong.xcuserdatad/xcschemes/xcschememanagement.plist @@ -0,0 +1,16 @@ + + + + + SchemeUserState + + SLR_Data_Annotation (Playground).xcscheme + + isShown + + orderHint + 0 + + + +