Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions ICECAN/corpus_reorganization_script.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@

# coding=utf-8
import os
import sys
import csv
Expand All @@ -8,8 +10,10 @@
from statistics import mean
from textgrid import TextGrid, IntervalTier

orig_dir = r'/media/share/corpora/ICE-Can'
output_dir = r'/media/share/corpora/ICE-Can/to-align'
# orig_dir = r'/media/share/corpora/ICE-Can'
# output_dir = r'/media/share/corpora/ICE-Can/to-align'
orig_dir = r"/Volumes/data/corpora/ICE-Can"
output_dir = r"/Volumes/data/corpora/ICE-Can/to-align"

os.makedirs(output_dir, exist_ok=True)

Expand Down Expand Up @@ -332,5 +336,5 @@ def convert_wavs():

if __name__ == '__main__':
reorganize_meta_file()
convert_wavs()
parse_transcripts()
# convert_wavs()
# parse_transcripts()
Binary file added ICECAN/sibilant_script/.DS_Store
Binary file not shown.
49 changes: 49 additions & 0 deletions ICECAN/sibilant_script/open_2.praat
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
# read in file info
form Open a tgwav
sentence Tg_path /Volumes/data/corpora/Raleigh/ral368/ral3680d.TextGrid
sentence Wav_path /Volumes/data/corpora/Raleigh/ral368/ral3680d.wav
positive Start 1494.81
positive End 1494.91
endform

# load files
tg = Read from file: tg_path$
wav = Read from file: wav_path$

# select objects
selectObject: wav
plusObject: tg
grid$ = selected$ ("TextGrid")
sound$ = selected$ ("Sound")

# add annotation tier and boundaries
select TextGrid 'grid$'
numberOfTiers = Get number of tiers

Edit
editor TextGrid 'grid$'
Add interval tier... numberOfTiers+1 sibann
Close
endeditor

Insert boundary... numberOfTiers+1 start
Insert boundary... numberOfTiers+1 end

plus Sound 'sound$'
# zoom in on focused part
View & Edit
editor TextGrid 'grid$'
Zoom: start, end
endeditor


select Sound 'sound$'
Edit
editor Sound 'sound$'
Zoom: start, end
Select: start, end
endeditor

#writeInfoLine: "COG: ", cog
#appendInfoLine: "

19 changes: 19 additions & 0 deletions ICECAN/sibilant_script/open_tg.praat
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
form Open a tgwav
sentence tg_path /Volumes/data/corpora/Raleigh/ral368/ral3680d.TextGrid
sentence wav_path /Volumes/data/corpora/Raleigh/ral368/ral3680d.wav
positive start 1494.81
positive end 1494.91
endform
tg = Read from file: tg_path$
wav = Read from file: wav_path$
selectObject: wav
plusObject: tg

View & Edit
Insert interval tier... '5' 'sib_ann'
editor: tg

#Insert boundary... '5' start
#Insert boundary... '5' end
Zoom: start, end
endeditor
17 changes: 17 additions & 0 deletions ICECAN/sibilant_script/plan
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
plan:
- open/read csv w/ python/pandas (done)
- take 1% weighted sample (done)
- for each sample:
get path/filename
get timing info
write to individual file
get COG, peak, slope, spread
write to another file (?)
- interactive script
in batches (to avoid opening 600 praat windows at once and crashing computer)
subprocess call to praat script
praat script opens corresponding file with path/filename and timing info (done)
praat script adds a tier where annotation can happen (done)
praat script opens textgrid and wav file, zooms in on time slice (done)
praat script opens long term spectral slice window
praat script opens info window with COG, peak, slope, spread measures
Binary file added ICECAN/sibilant_script/sendpraat
Binary file not shown.
20 changes: 20 additions & 0 deletions ICECAN/sibilant_script/sib_script
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
csv_raw$ = readFile$ ("testsibilants.csv")

procedure split (.sep$, .str$)
.seplen = length(.sep$)
.length = 0
repeat
.strlen = length(.str$)
.sep = index(.str$, .sep$)
if .sep > 0
.part$ = left$(.str$, .sep-1)
.str$ = mid$(.str$, .sep+.seplen, .strlen)
else
.part$ = .str$
endif
.length = .length+1
.array$[.length] = .part$
until .sep = 0
endproc

Read from file: "/Volumes/data/corpora" ; Mac
116 changes: 116 additions & 0 deletions ICECAN/sibilant_script/superscript.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
import os
import pandas as pd
import re
import argparse
import numpy as np
from subprocess import Popen, PIPE
import sys
import shlex
from pyraat import PraatAnalysisFunction

np.random.seed(1234)

PRAAT = "/Applications/Praat.app/Contents/MacOS/Praat"

def get_sample(path):
sib_df = pd.read_csv(path)

all_corpora = sib_df['corpus']
corp_freqdict = {c:0 for c in set(all_corpora)}
data_dict = {c: None for c in set(all_corpora)}
for c in all_corpora:
corp_freqdict[c]+=1
perc = .01
tot_df = pd.DataFrame()
corp_freqdict = {c: np.rint(perc * float(v)) for c,v in corp_freqdict.items()}
for corp,num_samples in corp_freqdict.items():
data=[]
sub_frame = sib_df[sib_df.corpus == corp]

all_idxs = np.arange(0, sub_frame.shape[0],1)

chosen_idxs = np.random.choice(all_idxs, size=int(num_samples))
tot_df = pd.concat([tot_df, sub_frame.iloc[chosen_idxs]])

return tot_df, set(all_corpora)

def input_taker(df,locations):
print("Interactive script for sibilant checks:")
enter = input("press enter to continue")
row_idx = 0
print(enter)

while enter.strip() is "":
# get a line from the df
row = df.iloc[row_idx]
filename = row["discourse"]
corpus = row["corpus"].lower()
print(corpus)
if corpus == "SOTC":
split_name = re.split("-", filename)
outer_dir = "-".join(split_name[0:2])
inner_dir = "-".join(split_name[0:3])
tg_path = os.path.join(locations[corpus], outer_dir, inner_dir, filename + ".TextGrid")
wav_path = os.path.join(locations[corpus], outer_dir, inner_dir, filename + ".wav")
else:
# elif corpus == "Raleigh":
outer_dir = filename[0:6]
tg_path = os.path.join(locations[corpus], outer_dir, filename + ".TextGrid")
wav_path = os.path.join(locations[corpus], outer_dir, filename + ".wav")

zoom_start, zoom_end = row["begin"], row["end"]

path_to_open = os.path.join(os.path.split(os.path.abspath(__file__))[0], "open_2.praat")
# quote_str = '"runScript: \\"{}\\", {} {} {} {}"'.format(path_to_open, tg_path, wav_path, zoom_start, zoom_end)
# script_args = [tg_path, wav_path, zoom_start, zoom_end]
quote_str = "execute /Users/esteng/SPADE/ICECAN/sibilant_script/open_2.praat {} {} {} {}".format(
tg_path, wav_path, zoom_start, zoom_end)
cmd = ["./sendpraat", "praat", quote_str]
print(quote_str)
with Popen(cmd, stdout=PIPE, stderr=PIPE, stdin=PIPE) as p:
try:
text = str(p.stdout.read().decode('latin'))
err = str(p.stderr.read().decode('latin'))
except UnicodeDecodeError:
print(p.stdout.read())
print(p.stderr.read())

print(text, err)

# ./sendpraat praat "execute Users/Elias/SPADE/ICECAN/sibilant_script/open_tg.praat
# run_script("/Applications/Praat.app/Contents/MacOS/Praat", "open_2.praat", *script_args)
# par = run_script("open_2.praat", arguments=script_args)
# par()
enter = input("press enter to continue")
# open textgrid with wav by subprocess calling praat script with arguments
row_idx+=1


def get_locations(corpora, location_file):
"""
needs a list of corpora (for checks) and a location file
where each line is <corpus_name>,<textgrid_location>
"""
with open(location_file) as f1:
lines = [x.split(",") for x in f1.readlines()]
location_dict = {x.lower():None for x in corpora}
for corpus, location in lines:
try:
if not os.path.exists(location.strip()):
print("Error: Location {} does not exist".format(location))
sys.exit(1)
location_dict[corpus.lower()] = location.strip()
except KeyError:
print("Error: Corpus {} is not in the sibilant dataset".format(corpus))
sys.exit(1)
return location_dict



one_perc_df, corpora = get_sample("testsibilants.csv")
just_Ral = one_perc_df[one_perc_df.corpus == "Raleigh"]
loc_dict = get_locations(corpora, "locations.txt")
input_taker(just_Ral, loc_dict)


print(one_perc_df.shape)
1 change: 1 addition & 0 deletions ICECAN/sibilant_script/temp.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
./sendpraat praat "execute /Users/esteng/SPADE/ICECAN/sibilant_script/open_2.praat /Volumes/data/corpora/Raleigh/ral128/ral1280d.TextGrid /Volumes/data/corpora/Raleigh/ral128/ral1280d.wav 1963.9436 1964.1331699999998"
Binary file added sibilant_script/.DS_Store
Binary file not shown.
31 changes: 31 additions & 0 deletions sibilant_script/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Instructions on how to use interactive script

## prerequisites
Python 3.5 and praat are required to run this script. The current version assumes that OsX is running, but this can be changed. Using a virtualenv is recommended; an easy way to set up an environment is by installing and using [Miniconda](https://conda.io/miniconda.html).

To use the script, the libraries in requirements.txt need to be installed. This can be done by inputting
`pip install -r requirements.txt`

The testsibilants.csv file should be in the same directory as the script.

## current functionality
Right now, the script works for the following corpora: SB_West, SOTC, and Raleigh

## How to:
1. Edit the location file
- the format should be <CORPUS_NAME>,<PATH_TO_CORPUS>
- the path should be an absolute path
- examples can be found in the "location_example.txt" file
2. Open the Praat application
- praat needs to be running already for the script to work
3. run the script
- input `python superscript.py` into the command line
4. step through the interactive script
- in the command line, a prompt will appear to press enter. Each time you press enter in the command line, a new row will be read from the testsibilants.csv file, which corresponds to a new sibilant. Three Praat windows should open. Note that opening the Praat windows may take a few seconds.

## Adding new corpora
To add support for new corpora (in case you have them in textgrid format), two changes need to be made:
1. add their locations to the `locations.txt` file
2. add their names to `CORPUS_LIST` list at line 11 of `superscript.py`


57 changes: 57 additions & 0 deletions sibilant_script/open_2.praat
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# read in file info
form Open a tgwav
sentence Tg_path /Volumes/data/corpora/Raleigh/ral368/ral3680d.TextGrid
sentence Wav_path /Volumes/data/corpora/Raleigh/ral368/ral3680d.wav
positive Start 1494.81
positive End 1494.91
positive Cog 0.0
positive Peak 0.0
positive Slope 0.0
positive Spread 0.0
endform

# load files
tg = Read from file: tg_path$
wav = Read from file: wav_path$

# select objects
selectObject: wav
plusObject: tg
grid$ = selected$ ("TextGrid")
sound$ = selected$ ("Sound")

# add annotation tier and boundaries
select TextGrid 'grid$'
numberOfTiers = Get number of tiers

Edit
editor TextGrid 'grid$'
Add interval tier... numberOfTiers+1 sibann
Close
endeditor

Insert boundary... numberOfTiers+1 start
Insert boundary... numberOfTiers+1 end

plus Sound 'sound$'
# zoom in on focused part
View & Edit
editor TextGrid 'grid$'
Zoom: start, end
endeditor


select Sound 'sound$'
Edit
editor Sound 'sound$'
Zoom: start, end
Select: start, end
endeditor

writeInfoLine: "COG: ", cog
appendInfoLine: "Peak: ", peak
appendInfoLine: "Slope: ", slope
appendInfoLine: "Spread: ", spread



17 changes: 17 additions & 0 deletions sibilant_script/plan
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
plan:
- open/read csv w/ python/pandas (done)
- take 1% weighted sample (done)
- for each sample:
get path/filename
get timing info
write to individual file
get COG, peak, slope, spread
write to another file (?)
- interactive script
in batches (to avoid opening 600 praat windows at once and crashing computer)
subprocess call to praat script
praat script opens corresponding file with path/filename and timing info (done)
praat script adds a tier where annotation can happen (done)
praat script opens textgrid and wav file, zooms in on time slice (done)
praat script opens long term spectral slice window
praat script opens info window with COG, peak, slope, spread measures
Binary file added sibilant_script/sendpraat
Binary file not shown.
Binary file added sibilant_script/sendpraat.exe
Binary file not shown.
Loading