georgesterpu · saamc · Mar 16, 2018 · Mar 16, 2018 · Mar 27, 2018 · Mar 27, 2018
diff --git a/__init__.py b/__init__.py
@@ -0,0 +1 @@
+__path__ = __import__('pkgutil').extend_path(__path__, __name__)
diff --git a/pyVSR/Learn/htk.py b/pyVSR/Learn/htk.py
@@ -1,5 +1,5 @@
 from os import path, makedirs, remove, listdir, environ, pathsep
-from subprocess import list2cmdline, run, Popen, PIPE
+from subprocess import list2cmdline, check_call, Popen, PIPE
 import numpy as np
 from ..utils import read_htk_header
 from ..tcdtimit.files import phoneme_file, phoneme_list, viseme_file, viseme_list, character_file, character_list
@@ -178,7 +178,7 @@ def _initialize_stats(self):
 
         cmd = ['HCompV', '-C', self._config, '-f', '0.01', '-m', '-S', self.trainscp, '-M', firstdir, self._hmm_proto]
         print(list2cmdline(cmd))
-        run(cmd, check=True)
+        check_call(cmd)
 
     def _increase_mixtures(self, nmix):
         scp = self._gen_edit_script_num_mixtures(nmix)
@@ -191,7 +191,7 @@ def _increase_mixtures(self, nmix):
         cmd = ['HHEd', '-H', prevdir + 'vFloors', '-H', prevdir + 'hmmdefs', '-M', nextdir, scp,
                self._viseme_list]
         print(list2cmdline(cmd))
-        run(cmd, check=True)
+        check_call(cmd)
 
     def _fix_silence_viseme(self):
         edit_script = self._gen_edit_script_silence_vis()
@@ -206,7 +206,7 @@ def _fix_silence_viseme(self):
                nextdir, edit_script, self._viseme_list]
 
         print(list2cmdline(cmd))
-        run(cmd, check=True)
+        check_call(cmd)
 
     def _gen_edit_script_silence_vis(self):
         fname = './run/sil.hed'
@@ -267,17 +267,17 @@ def _gen_wordnet(self, wdnet):
 
             cmd = ['HLStats -b ./run/bigrams -o ' + self._viseme_list + ' ' + self._labels]
             print(list2cmdline(cmd))
-            run(cmd, check=True, shell=True)
+            check_call(cmd, shell=True)
 
             cmd = ['HBuild -n ./run/bigrams ' + new_hmmlist + ' ' + wdnet]
             print(list2cmdline(cmd))
-            run(cmd, check=True, shell=True)
+            check_call(cmd, shell=True)
             self._word_net = wdnet
 
         else:
             cmd = ['HParse', self._grammar, wdnet]
             print(list2cmdline(cmd))
-            run(cmd, check=True)
+            check_call(cmd)
             self._word_net = wdnet
 
     def _replicate_proto(self):
@@ -392,7 +392,7 @@ def _embedded_reestimation(self, num_times, binary=False, pruning='off', stats=F
                   ['-H', previous_dir + 'vFloors', '-H', previous_dir + 'hmmdefs',
                    '-M', current_dir, '-p', '0', self._viseme_list] + acc_files
 
-            run(list2cmdline(cmd), shell=True, check=True)
+            check_call(list2cmdline(cmd), shell=True, check=True)
 
             # cleanup folder (remove accs, scp.i)
             # cmd = ['rm ' + current_dir + '*.acc']
@@ -410,7 +410,7 @@ def print_results(self, nmix, case):
         cmd = ['HResults', '-I', self._labels, '-f', '-p', self._viseme_list, self.predicted_labels]
         print(list2cmdline(cmd))
         with open('./run/results_' + case + '_' + str(nmix)+'_mixtures.txt', 'w') as logfile:
-            run(cmd, check=True, stdout=logfile)
+            check_call(cmd, check=True, stdout=logfile)
 
 
 # r"""these functions are not part of the class"""

diff --git a/pyVSR/__init__.py b/pyVSR/__init__.py
@@ -1,7 +1,5 @@
 from .avsr import AVSR
 from .avsr import run
 from . import utils
-
-
-
-
+from .avletters.files import request_files
+#from .ouluvs2 import files
diff --git a/pyVSR/avletters/files.py b/pyVSR/avletters/files.py
@@ -0,0 +1,121 @@
+from os import path
+try:
+      from pathlib import Path
+except ImportError:
+      from pathlib2 import Path  # python 2 backport
+from natsort import natsorted
+from sys import argv
+import pprint
+import re
+
+_current_path = path.abspath(path.dirname(__file__))
+
+split = re.compile("_|-") 
+
+def request_files(dataset_dir,
+                  protocol='speaker_independent',
+                  speaker_id=None, content="video", condition="none"):
+
+    files = get_files(dataset_dir, content, condition)
+    speakers = get_speakers(files)
+
+    if protocol == 'speaker_dependent':
+        train, dev, test = _preload_files_speaker_dependent(files, speaker_id, utterance_types)
+    elif protocol == 'speaker_independent':
+        train, dev, test = _preload_files_speaker_independent(files, speakers, content, condition)
+    else:
+        raise Exception('undefined dataset split protocol')
+
+    return natsorted(train), natsorted(dev), natsorted(test),
+
+
+def get_files(dataset_dir, content="video", condition=None):
+
+    p = Path(dataset_dir)
+
+    if content == "video":
+        p = p.joinpath("Lips")
+        files = p.glob("*.mat")
+    elif content == "audio":
+        #only mfcc in distribution, waveform dir empty
+        p = p.joinpath("Audio").joinpath("mfcc").joinpath(condition)
+        print p.as_posix()
+        if p.exists() and p.is_dir():
+            files = p.glob("*.mfcc")
+        else:
+            raise Exception("unknown condition: " + condition + " in " + p.stem)
+    elif content == "label":
+        p = p.joinpath("Label")
+        #we don't look for the extension here, as it's not a given
+        files = p.glob("[A-Z][1-3]_*.*")
+    else:
+        raise Exception("unknown content: " + content)
+
+    #the glob returns a generator that is empty once used
+    return [f for f in files]
+
+
+def get_speakers(files):
+
+    return list(set([get_speaker(f) for f in files]))
+
+def get_speaker(file_):
+    return split.split(file_.stem)[1]
+
+
+#speaker_dependent means: we have some speakers that we trained on in the dev/test sets
+def _preload_files_speaker_dependent(files, speaker_id):
+
+    raise Exception("speaker dependent protocol not implemented")
+
+    ### NEED TO BE CREATIVE HERE
+    ## we can basically split along repetitions but it's very little data
+
+    #60/20/20 split by recursive split
+    from sklearn.model_selection import train_test_split
+    train, test = train_test_split(files, test_size=0.20, random_state=0)
+    train, dev = train_test_split(train, test_size=0.25, random_state=0)
+
+    return train, dev, test
+
+def _preload_files_speaker_independent(files, speakers, content="video", condition=None):
+
+    #60/20/20 split by recursive split over speakers
+    from sklearn.model_selection import train_test_split
+    strain, stest = train_test_split(speakers, test_size=0.20, random_state=0)
+    strain, sdev = train_test_split(strain, test_size=0.25, random_state=0)
+
+    #map all files to their speaker
+    speaker_files = {}
+    for file_ in files:                                                    
+        speaker = get_speaker(file_)
+        try:
+            speaker_files[speaker].append(file_)
+        except:
+            speaker_files[speaker] = [file_]
+
+    train_files = []
+    dev_files = []
+    test_files = []
+    #for each subset
+    for sset, fset in [(strain, train_files),(sdev, dev_files),(stest, test_files)]:
+        for speaker in sset:
+            fset.extend(speaker_files[speaker])
+
+    return train_files, dev_files, test_files
+
+if __name__ == "__main__":
+
+    print argv[0],": ",argv[1]
+    pp = pprint.PrettyPrinter(indent=4)
+
+    train, dev, test = request_files(argv[1], protocol='speaker_independent',
+                  speaker_id=None, content="video", condition="none")
+
+    print "train set:"
+    pp.pprint(train)
+    print "dev set:"
+    pp.pprint(dev)
+    print "test set:"
+    pp.pprint(test)
+
diff --git a/pyVSR/avletters/import_data.sh b/pyVSR/avletters/import_data.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+#this is not guarded against whitespace!
+src_dir=/data/corpora/audiovisual/avletters
+#import audio data as is with sub directories
+mkdir data
+ln -s $src_dir/Audio data/
+#import video data with name change, dropping "-lips" from "xxx-lips.mat"
+mkdir data/Lips
+for f in $src_dir/Lips/*.mat; do ln -s $f data/Lips/$(basename ${f/-lips/}); done
+#make labels from the file name, this is just the first letter
+mkdir data/Label
+for f in data/Audio/mfcc/Clean/*.mfcc; do name=$(basename $f ".mfcc"); echo $name ${name/[0-9]_*} > data/Label/$name.mlf; done