libvisiontransfer  10.6.0
tokenizer.h
1 /*******************************************************************************
2  * Copyright (c) 2023 Allied Vision Technologies GmbH
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to deal
6  * in the Software without restriction, including without limitation the rights
7  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8  * copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *******************************************************************************/
14 
15 #ifndef VISIONTRANSFER_TOKENIZER_H
16 #define VISIONTRANSFER_TOKENIZER_H
17 
18 #include <vector>
19 #include <string>
20 #include <sstream>
21 
22 namespace visiontransfer {
23 namespace internal {
24 
25 class Tokenizer {
26 public:
27  Tokenizer(): _separators{" ", "\t"}, _comment_initiators{"#"}, _strip_chars(""), _collapse(true), _quoting(true) {}
28  Tokenizer& separators(const std::vector<std::string>& seps) { _separators = seps; return *this; }
29  Tokenizer& strip_chars(const std::string& chars) { _strip_chars = chars; return *this; }
30  Tokenizer& collapse(bool coll) { _collapse = coll; return *this; }
31  Tokenizer& quoting(bool quot) { _quoting = quot; return *this; }
32 #if VISIONTRANSFER_CPLUSPLUS_VERSION >= 201703L
33  std::vector<std::string> tokenize(const std::string_view& inp) {
34 #else
35  std::vector<std::string> tokenize(const std::string& inp) {
36 #endif
37  std::vector<std::string> toks;
38  std::stringstream ss;
39  bool issep, iscomment;
40  char quotemode = '\0';
41  for (size_t i=0; i<inp.size(); ++i) {
42  if (quotemode == '\0') {
43  // in unquoted region
44  issep = false; iscomment = false;
45  for (const auto& comm: _comment_initiators) {
46  if (inp.substr(i, comm.size())==comm) {
47  iscomment = true;
48  i += comm.size()-1;
49  break;
50  }
51  }
52  for (const auto& sep: _separators) {
53  if (inp.substr(i, sep.size())==sep) {
54  issep = true;
55  i += sep.size()-1;
56  break;
57  }
58  }
59  if (iscomment) {
60  i = inp.size();
61  break; // end of processing
62  } else if (issep) {
63  std::string tmp = ss.str();
64  if (!_collapse || !tmp.empty()) {
65  toks.push_back(tmp);
66  ss.str("");
67  }
68  } else if (inp[i] == '"') {
69  quotemode = '"';
70  } else if (inp[i] == '\'') {
71  quotemode = '\'';
72  } else {
73  ss << inp[i];
74  }
75  } else if (quotemode == '\'') {
76  // in single quote mode
77  if (inp.substr(i, 2) == "\\\'") {
78  ss << '\'';
79  i += 1;
80  } else if (inp.substr(i, 2) == "\\\\") {
81  ss << '\\';
82  i += 1;
83  } else if (inp[i] == '\'') {
84  toks.push_back(ss.str());
85  ss.str("");
86  quotemode = '\0';
87  } else {
88  ss << inp[i];
89  }
90  } else if (quotemode == '"') {
91  // in double quote mode
92  if (inp.substr(i, 2) == "\\\"") {
93  ss << '"';
94  i += 1;
95  } else if (inp.substr(i, 2) == "\\\\") {
96  ss << '\\';
97  i += 1;
98  } else if (inp.substr(i, 2) == "\\n") {
99  ss << '\n';
100  i += 1;
101  } else if (inp[i] == '"') {
102  toks.push_back(ss.str());
103  ss.str("");
104  quotemode = '\0';
105  } else {
106  ss << inp[i];
107  }
108  }
109  }
110  std::string tmp = ss.str();
111  if (!_collapse || !tmp.empty()) {
112  toks.push_back(tmp);
113  }
114  if (_strip_chars.size()) {
115  std::vector<std::string> toks2;
116  for (auto s: toks) {
117  auto st = s.find_first_not_of(_strip_chars);
118  auto en = s.find_last_not_of(_strip_chars);
119  toks2.push_back((st==en)?std::string():std::string(s.substr(st, en+1-st)));
120  }
121  return toks2;
122  } else {
123  return toks;
124  }
125  }
126 protected:
127  std::vector<std::string> _separators;
128  std::vector<std::string> _comment_initiators;
129  std::string _strip_chars;
130  bool _collapse, _quoting;
131 };
132 
133 } // namespace internal
134 } // namespace visiontransfer
135 
136 #endif
137 
Allied Vision