@@ -13,6 +13,7 @@ import Random: shuffle, randstring, seed!, make_seed
13
13
import Memento
14
14
import DataStructures: OrderedDict
15
15
import REPL
16
+ import Parsers
16
17
using REPL. TerminalMenus
17
18
using DelimitedFiles
18
19
@@ -30,7 +31,14 @@ tracking identifier mappings.
30
31
"""
31
32
function deid_file! (dicts:: DeIdDicts , fc:: FileConfig , pc:: ProjectConfig , logger)
32
33
# Initiate new file
33
- infile = CSV. File (fc. filename, dateformat = fc. dateformat)
34
+ infile = try
35
+ CSV. File (fc. filename, dateformat = fc. dateformat)
36
+ catch ArgumentError
37
+ CSV. File (fc. filename)
38
+ end
39
+
40
+ dicts = DeIdDicts (dicts, fc. dateformat)
41
+
34
42
outfile = joinpath (pc. outdir, " deid_" * fc. name * " _" * getcurrentdate () * " .csv" )
35
43
36
44
ncol = length (infile. names)
@@ -75,52 +83,60 @@ function deid_file!(dicts::DeIdDicts, fc::FileConfig, pc::ProjectConfig, logger)
75
83
writedlm (io, reshape (header, 1 , length (header)), ' ,' )
76
84
77
85
# Process each row
78
- for row in infile
79
-
80
- val = getoutput (dicts, Hash, getproperty (row, pcol), 0 )
81
- pid = setrid (val, dicts)
82
-
83
- for col in infile. names
84
- colname = get (fc. rename_cols, col, col)
85
-
86
- action = get (fc. colmap, colname, Missing) :: Type
87
- # drop cols
88
- action == Drop && continue
89
-
90
- VAL = getproperty (row, col)
91
-
92
- # apply pre-processing transform
93
- if haskey (fc. preprocess, colname) && ! ismissing (VAL)
94
- transform = fc. preprocess[colname]
95
- transform = replace (transform, " VAL" => " \" $VAL \" " )
96
- expr = Meta. parse (transform)
97
- VAL = Core. eval (@__MODULE__ , expr)
98
- end
99
-
100
- VAL = getoutput (dicts, action, VAL, pid)
101
-
102
- if col == pcol
103
- VAL = pid
86
+ for (i, row) in Iterators. enumerate (infile)
87
+ try
88
+ val = getoutput (dicts, Hash, getproperty (row, pcol), 0 )
89
+ pid = setrid (val, dicts)
90
+ columns = Vector {String} ()
91
+
92
+ for col in infile. names
93
+ colname = get (fc. rename_cols, col, col)
94
+
95
+ action = get (fc. colmap, colname, Missing) :: Type
96
+
97
+ if action == Drop
98
+ continue
99
+ end
100
+
101
+ VAL = getproperty (row, col)
102
+
103
+ # apply pre-processing transform
104
+ if haskey (fc. preprocess, colname) && ! ismissing (VAL)
105
+ transform = fc. preprocess[colname]
106
+ transform = replace (transform, " VAL" => " \" $VAL \" " )
107
+ expr = Meta. parse (transform)
108
+ VAL = Core. eval (@__MODULE__ , expr)
109
+ end
110
+
111
+ VAL = getoutput (dicts, action, VAL, pid)
112
+
113
+ if col == pcol
114
+ VAL = pid
115
+ end
116
+
117
+ # apply post-processing transform
118
+ if haskey (fc. postprocess, colname) && ! ismissing (VAL)
119
+ transform = fc. postprocess[colname]
120
+ transform = replace (transform, " VAL" => " \" $VAL \" " )
121
+ expr = Meta. parse (transform)
122
+ VAL = Core. eval (@__MODULE__ , expr)
123
+ end
124
+
125
+ if eltype (VAL) <: String
126
+ VAL = replace (VAL, " \" " => " \\\" " )
127
+ end
128
+
129
+ if VAL != = nothing && ! ismissing (VAL)
130
+ push! (columns, string (VAL))
131
+ else
132
+ push! (columns, " " )
133
+ end
104
134
end
105
135
106
- # apply post-processing transform
107
- if haskey (fc. postprocess, colname) && ! ismissing (VAL)
108
- transform = fc. postprocess[colname]
109
- transform = replace (transform, " VAL" => " \" $VAL \" " )
110
- expr = Meta. parse (transform)
111
- VAL = Core. eval (@__MODULE__ , expr)
112
- end
113
-
114
- if eltype (VAL) <: String
115
- VAL = replace (VAL, " \" " => " \\\" " )
116
- end
117
-
118
- write (io, " \" $VAL \" " )
119
- if lastcol == col
120
- write (io, ' \n ' )
121
- else
122
- write (io, " ," )
123
- end
136
+ writedlm (io, reshape (columns, 1 , length (columns)), ' ,' )
137
+ catch e
138
+ Memento. error (logger, " $(Dates. now ()) Error occurred while processing row $i " )
139
+ rethrow (e)
124
140
end
125
141
end
126
142
@@ -129,8 +145,6 @@ function deid_file!(dicts::DeIdDicts, fc::FileConfig, pc::ProjectConfig, logger)
129
145
return nothing
130
146
end
131
147
132
-
133
-
134
148
"""
135
149
deidentify(cfg::ProjectConfig)
136
150
This is the constructor for the `DeIdentified` struct. We use this type to store
@@ -142,7 +156,7 @@ digest of the original primary ID to our new research IDs.
142
156
"""
143
157
function deidentify (cfg:: ProjectConfig )
144
158
num_files = length (cfg. file_configs)
145
- dicts = DeIdDicts (cfg. maxdays, cfg. shiftyears)
159
+ dicts = DeIdDicts (cfg. maxdays, cfg. shiftyears, cfg . dateformat )
146
160
147
161
if ! isdir (cfg. outdir)
148
162
# mkpath also creates any intermediate paths
0 commit comments