-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtoolkits.py
48 lines (37 loc) · 1.85 KB
/
toolkits.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# Import relevant modules
import os # interface with the underlying OS
def get_raw_jobs():
'''
GET_RAW_JOBS returns a tuple (raw_path, raw_jobs), where raw_path is the path to the folder containing raw_jobs, i.e., Job
Bulletins, and raw_jobs is the list of all jobs in the Job Bulletins folder. Keep in mind that these are just file names.
'''
# Define path to look at
path = 'CityofLA/Job Bulletins/'
# Get a list of all txt files in this path
raw_jobs = os.listdir(path)
raw_jobs.sort() # WARNING: this mutates the list
# Remove `Vocational Worker DEPARTMENT OF PUBLIC WORKS.txt`
# as it doesn't share the same job description pattern of the City of LA
raw_jobs.remove('Vocational Worker DEPARTMENT OF PUBLIC WORKS.txt')
# Sanity check
assert len(raw_jobs) == 682 # this number comes from already trying this code individually
# Returns
return (path, raw_jobs)
def get_cleaned_jobs():
'''
GET_CLEANED_JOBS returns a tuple (clean_path, clean_jobs), where clean_path is the path to the folder containing clean_jobs,
i.e., JobBulletins_clean, and clean_jobs is the list of all jobs in the JobBulletins_clean folder. Keep in mind that these
are just file names.
'''
# Define path to look at
path = 'CityofLA/JobBulletins_cleaned/'
# Get a list of all txt files in this path
cleaned_jobs = os.listdir(path)
cleaned_jobs.sort() # WARNING: this mutates the list
# Remove `Vocational Worker DEPARTMENT OF PUBLIC WORKS.txt`
# as it doesn't share the same job description pattern of the City of LA
cleaned_jobs.remove('Vocational Worker DEPARTMENT OF PUBLIC WORKS.txt')
# Sanity check
assert len(cleaned_jobs) == 682 # this number comes from already trying this code individually
# Returns
return (path, cleaned_jobs)