HDvis/StringUtils.h at master · JeffersonLab/HDvis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
//
// Created by romanov on 4/28/16.
//

#ifndef RCDB_CPP_STRINGUTILS_H
#define RCDB_CPP_STRINGUTILS_H


#include <cstdlib>
#include <string>
#include <vector>

#include <algorithm>
#include <functional>
#include <cctype>
#include <locale>


#include <cstdio>
#include <cstdlib>
#include <cstdarg>
#include <iostream>
#include <sstream>


//returns true if char is one of CCDB_BLANK_CHARACTERS
#define RCDB_CHECK_CHAR_IS_BLANK(character) ((character)==' ' || (character)=='\n' || (character)=='\t' || (character)=='\v' || (character)=='\r' || (character)=='\f')

class StringUtils {
public:

    static std::string EscapeJsonString(const std::string& input) {
        std::ostringstream ss;
        for (auto iter = input.cbegin(); iter != input.cend(); iter++) {
            //C++98/03:
            //for (std::string::const_iterator iter = input.begin(); iter != input.end(); iter++) {
            switch (*iter) {
                case '\\': ss << "\\\\"; break;
                case '"': ss << "\\\""; break;
                case '/': ss << "\\/"; break;
                case '\b': ss << "\\b"; break;
                case '\f': ss << "\\f"; break;
                case '\n': ss << "\\n"; break;
                case '\r': ss << "\\r"; break;
                case '\t': ss << "\\t"; break;
                default: ss << *iter; break;
            }
        }
        return ss.str();
    }

   static inline std::string GetFormattedTime(std::tm time) {
        char buff[20];
        strftime(buff, 20, "%Y-%m-%d %H:%M:%S", &time);
        return std::string(buff);
    }

    static inline std::string GetFormattedTime(std::time_t time) {
        return GetFormattedTime(*localtime(&time));
    }

    // trim from start (in place)
    static inline void ltrim(std::string &s) {
        s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun<int, int>(std::isspace))));
    }

    // trim from end (in place)
    static inline void rtrim(std::string &s) {
        s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end());
    }

    // trim from both ends (in place)
    static inline void trim(std::string &s) {
        ltrim(s);
        rtrim(s);
    }

    // trim from start (copying)
    static inline std::string ltrimmed(std::string s) {
        ltrim(s);
        return s;
    }

    // trim from end (copying)
    static inline std::string rtrimmed(std::string s) {
        rtrim(s);
        return s;
    }

    // trim from both ends (copying)
    static inline std::string trimmed(std::string s) {
        trim(s);
        return s;
    }

    static int Replace(const std::string& pattern, const std::string& replace, const std::string& source, std::string &out) {
        int matches = 0;
        out.assign(source);

        std::string::size_type start = out.find( pattern );

        // Replace all matches
        while ( start != std::string::npos ) {
            matches++;
            out.replace( start, pattern.size(), replace );
            // Be sure to jump forward by the replacement length
            start = out.find( pattern, start + replace.size() );
        }
        return matches;
    }


    static std::string Replace(const std::string& pattern, const std::string& replace, const std::string& source)
    {
        std::string out("");
        Replace(pattern, replace, source, out);
        return out;
    }


    static bool WildCardCheck( const char* pattern, const char* source )
    {
        char *cp, *mp;
        while ((*source) && (*pattern != '*')) {
            if ((*pattern != *source) && (*pattern != '?')) {
                return 0;
            }

            pattern++;
            source++;
        }

        while (*source) {
            if (*pattern == '*') {
                if (!*++pattern) {
                    return 1;
                }

                mp = const_cast<char *>(pattern);
                cp = const_cast<char *>(source+1);
            }
            else if ((*pattern == *source) || (*pattern == '?')) {
                pattern++;
                source++;
            }
            else {
                pattern = mp;
                source = cp++;
            }
        }

        while (*pattern == '*') {
            pattern++;
        }

        return !*pattern;
    }


    static std::vector<std::string> Split( const std::string &s, const std::string& delimiters /*= " "*/ )
    {
        std::vector<std::string> elements;
        return Split(s, elements, delimiters);
    }


    static std::vector<std::string> Split( const std::string& str, std::vector<std::string>& tokens, const std::string& delimiters /*= " "*/ )
    {
        // Skip delimiters at beginning.
        auto lastPos = str.find_first_not_of(delimiters, 0);

        // Find first "non-delimiter".
        auto pos     = str.find_first_of(delimiters, lastPos);

        while (std::string::npos != pos || std::string::npos != lastPos)
        {
            // Found a token, add it to the vector.
            tokens.push_back(str.substr(lastPos, pos - lastPos));
            // Skip delimiters.  Note the "not_of"
            lastPos = str.find_first_not_of(delimiters, pos);
            // Find next "non-delimiter"
            pos = str.find_first_of(delimiters, lastPos);
        }
        return tokens;
    }


    /** Splits string to lexical values.
     *
     * LexicalSplit treats:
     * 1) "quoted values" as one value,
     * 2) '#' not in the beginning of the file are treated as comments to the end of the line
     * 3) skips all white space characters. All specification is in doc/ccdb_file_format.pdf
     */
    static std::vector<std::string> LexicalSplit( const std::string& source, bool skipComments = false )
    {
        std::vector<std::string> tokens;
        LexicalSplit(tokens, source, skipComments);
        return tokens;
    }


    static void LexicalSplit( std::vector<std::string>& tokens, const std::string& source,  bool skipComments = false )
    {
        //

        /** Splits string to lexical values.
        *
        * LexicalSplit treats:
        * 1) "quoted values" as one value,
        * 2) '#' not in the beginning of the file are treated as comments to the end of the line
        * 3) skips all white space characters. All specification is in doc/ccdb_file_format.pdf
        *
        * @remark
        * Handling inconsistencies and errors while readout parse time:
        *  -  No ending quote . If no ending  is found, string value will be taken
        *     until the end of line.
        *  -  Comment inside a string. Comment symbol inside the line is ignored.
        *     So if you have a record in the file "info #4" it will be read just
        *     as "info #4" string
        *  -  Sticking string. In case of there is no spaces between symbols and
        *     an quotes, all will be merged as one string. I.e.:
        *     John" Smith" will be parsed as one value: "John Smith"
        *     John" "Smith will be parsed as one value: "John Smith"
        *     but be careful(!) not to forget to do a spaces between columns
        *     5.14"Smith" will be parsed as one value "5.14Smith" that probably would
        *     lead to errors if these were two different columns
        *  -  If data contains string fields they are taken into "..." characters. All "
        *     inside string should be saved by \" symbol. All words and symbols
        *     inside "..." will be interpreted as string entity.
        *
        */
        //clear output
        tokens.clear();
        bool stringIsStarted = false; //Indicates that we meet '"' and looking for second one
        //bool isSlash = false; //indicates if \ sign is happen to shield the quote or anothe slash
        std::string readValue="";
        //iterate through string
        for(size_t i=0; i<source.length(); i++)
        {
            if(RCDB_CHECK_CHAR_IS_BLANK(source[i]) && !stringIsStarted)
            {
                //we have a space! Is it a space that happens after value?
                if(readValue.length()>0)
                {
                    tokens.push_back(readValue);
                    readValue="";
                }
            }
            else
            {
                //it is not a blank character!
                if(source[i]=='\\' && stringIsStarted && i<(source.length()-1) && source[i+1]=='"')
                {
                    //ok! we found a \" inside a string! Not a problem! At all!

                    i++; //skip this \ symbol
                    readValue+=source[i]; //it is just one more symbol in value
                }
                else if(source[i]=='#' && !stringIsStarted) //lets check if it is a comment symbol that is not incide a string...
                {
                    //it is a comment started...
                    //lets save what we collected for now if we collected
                    if(readValue.length()>0)
                    {
                        tokens.push_back(readValue);
                        readValue="";
                    }

                    //and put there the rest of the lint(all comment) if there is something to put
                    if(i<(source.length()-1))
                    {
                        if(!skipComments) tokens.push_back(source.substr(i));

                        //after that gentlemen should exit
                        return;
                    }
                }
                else if(source[i]=='"')
                {

                    //it is a beginnig or ending  of a string
                    //just set appropriate flag and continue
                    stringIsStarted = !stringIsStarted;
                }
                else
                {
                    //it is just one more symbol in file
                    readValue+=source[i];
                }
            }

            //last we have is to check that
            //it is not the end of the lint
            if(i==(source.length()-1) && readValue.length()>0)
            {
                tokens.push_back(readValue);
                readValue="";
            }
        }
    }
};


#endif //RCDB_CPP_STRINGUTILS_H