The Sparta Modeling Framework
Loading...
Searching...
No Matches
SmartLexicalCast.hpp
Go to the documentation of this file.
1// <SmartLexicalCast.hpp> -*- C++ -*-
2
9#pragma once
10
11#include <iostream>
12#include <utility>
13#include <memory>
14
15#include <boost/lexical_cast.hpp>
16
19#include "sparta/utils/Utils.hpp"
21
22namespace sparta {
23 namespace utils {
24
29struct Modifier {
33 std::vector<const char*> options;
34
38 uint64_t mult;
39};
40
47
51 const std::vector<const char*> options;
52
56 const uint32_t radix;
57
61 const char* const digits;
62};
63
64static const uint32_t DEFAULT_RADIX = 10;
65
73static const std::vector<RadixPrefix> prefixes = {
74 {{"0x", "0X"}, 16, "0123456789abcdefABCDEF"},
75 {{"0b", "0B"}, 2, "01"},
76 {{"0"}, 8, "01234567"} // '0' Must be last to avoid false positives
77};
78
82static const std::vector<Modifier> suffixes = {
83
84 // ISO/IEC 8000 unit prefixes
85 // http://en.wikipedia.org/wiki/Mebi#IEC_standard_prefixes
86
87 {{"ki", "Ki",
88 "kI", "KI"}, 1_u64<<10}, // kibi
89 {{"mi", "Mi",
90 "mI", "MI"}, 1_u64<<20}, // mebi
91 {{"gi", "Gi", "bi", "Bi",
92 "gI", "GI", "bI", "BI"}, 1_u64<<30}, // gibi
93 {{"ti", "Ti",
94 "tI", "TI"}, 1_u64<<40}, // tebi
95 {{"pi", "Pi",
96 "pI", "PI"}, 1_u64<<50}, // pebi
97
98 // SI unit prefixes (e.g. 'K' in 'KB')
99
100 {{"n", "N"}, 1}, // normal
101 {{"k", "K"}, 1000}, // kilo
102 {{"m", "M"}, 1000000}, // mega
103 {{"g", "G", "b", "B"}, 1000000000}, // giga
104 {{"t", "T"}, 1000000000000}, // tera
105 {{"p", "P"}, 1000000000000000} // peta
106};
107
111static const char* WHITESPACE = " _,\t\n";
112
116static const char* DECIMAL_DIGITS = "0123456789.";
117
165template <typename T>
166inline T smartLexicalCast(const std::string& s,
167 size_t& end_pos,
168 bool allow_recursion=true,
169 bool allow_prefix=true) {
170 (void) allow_recursion;
171 (void) allow_prefix;
172 T result = lexicalCast<T>(s, 0); // 0 => use auto-radix
173 end_pos = std::string::npos;
174 return result;
175}
176
185inline std::string parseNumericString(const std::string& s, size_t pos, const char* digits, size_t& after_numeric) {
186 std::string raw_value_chars = digits;
187 raw_value_chars += WHITESPACE;
188 after_numeric = s.find_first_not_of(raw_value_chars, pos);
189
190 if(after_numeric == pos){
191 return "";
192 }
193
194 std::string numeric;
195 if(after_numeric == std::string::npos){
196 numeric = s.substr(pos);
197 }else{
198 numeric = s.substr(pos, after_numeric - pos);
199 }
200
201 // Remove all "whitespace" from within the string
202 for(const char* pc = WHITESPACE; *pc != '\0'; ++pc) {
203 std::string to_replace;
204 to_replace += *pc;
205 replaceSubstring(numeric, to_replace, "");
206 }
207
208 return numeric;
209}
210
211template <>
212inline uint64_t smartLexicalCast(const std::string& s,
213 size_t& end_pos,
214 bool allow_recursion,
215 bool allow_prefix) {
216 size_t pos = 0;
217
218 // Skip leading space. If string is ONLY leading space, return 0
219 pos = s.find_first_not_of(WHITESPACE);
220 if(pos == std::string::npos){
221 end_pos = pos; // Output ending position
222 return 0;
223 }
224
225 std::string numeric; // Numeric (integer) portion of this string
226 std::string fractional; // Decimal portion of the string (if any)
227 size_t suffix_pos = pos;
228
229 // Determine prefix (if allowed)
230
231 uint32_t radix = DEFAULT_RADIX;
232 const char* digits = DECIMAL_DIGITS;
233 if(allow_prefix){
234 // Look for a prefix (if any) at start of string (after whitespace) and extract radix
235 bool found_prefix = false;
236 for(auto & prefix : prefixes) {
237 for(auto & str : prefix.options) {
238 size_t prefix_pos = s.find(str, pos);
239 if(prefix_pos == pos) {
240 //std::cout << "found prefix at " << prefix_pos << std::endl;
241
242 // Attempt to read ahead using this prefix. Parser must be able to get a
243 // non-null string for this prefix to be considered a match.
244 // Note that because the digits string for various prefixes does not contain
245 // '.', this will fail to parse a numeric string if it contains a '.'. This is
246 // the correct behavior
247 size_t spec_suffix_pos;
248 numeric = parseNumericString(s, prefix_pos + strlen(str), prefix.digits, spec_suffix_pos);
249 if(numeric.size() != 0){
250 radix = prefix.radix;
251 digits = prefix.digits;
252 pos = suffix_pos = spec_suffix_pos;
253 found_prefix = true;
254 break;
255 }
256 }
257 }
258 if(found_prefix){
259 break;
260 }
261 }
262 }
263
264 //std::cout << "radix = " << radix << std::endl;
265
266 // Extract the (hopefully) numeric portion of the string
267 if(numeric.size() == 0){
268 sparta_assert(suffix_pos == pos);
269 numeric = parseNumericString(s, pos, digits, suffix_pos);
270 if(numeric.size() == 0){
271 throw SpartaException("Unable to parse a numeric value from substring \"")
272 << s.substr(pos) << "\" within full string \"" << s
273 << "\" for smart lexical casting";
274 }
275 }
276
277 pos = suffix_pos;
278
279 // Convert a decimal number.
280 // note, there are no whitespace or separators in numeric at this time
281 size_t decimal_pos = numeric.find('.');
282 if(decimal_pos != std::string::npos){
283 if(decimal_pos == numeric.size() - 1){
284 throw SpartaException("Encountered \".\" at the end of a numeric portion (\"") << numeric
285 << "\") of a string \"" << s << "\"";
286 }
287 fractional = numeric.substr(decimal_pos + 1);
288 numeric = numeric.substr(0, decimal_pos);
289 }
290
291 uint64_t value = 0;
292 // The C functions of yester-year might or might not error on
293 // empty strings. Different machines do different things. It's
294 // silly to send a null string to be cast either way, so catch it
295 // here and evaluate to 0.
296 if(numeric.find_first_not_of(WHITESPACE) != std::string::npos) {
297 // Determine raw integer value of the numeric portion of the string
298 value = lexicalCast<uint64_t>(numeric, radix);
299 }
300
301 if(pos == std::string::npos){
302 if(fractional.size() != 0){
303 throw SpartaException("Encountered a fractional value: \"") << numeric << "\" . \""
304 << fractional << "\" but no suffix was found, so this cannot possibly represent "
305 " an integer. Found in \"" << s << "\"";
306 }
307
308 end_pos = pos; // Output ending position
309 }else{
310 // This is redundant with the value extraction above
312 //size_t non_numeric_char_pos = numeric.find_first_not_of(digits);
313 //if(non_numeric_char_pos != std::string::npos){
314 // throw SpartaException("Found non-numeric digit '") << numeric[non_numeric_char_pos]
315 // << "' in portion of string (\"" << numeric << "\") expected to be only the digits: \""
316 // << digits << "\". Encountered when parsing full string\"" << s << "\"";
317 //}
318
319 // Find the suffix (if any) and extract a modifier function (e.g. x*1000)
320 size_t after_suffix_pos = pos;
321 uint64_t suffix_multiplier = 0;
322 for(auto & suffix : suffixes) {
323 for(auto & str : suffix.options) {
324 auto tmp = s.find(str, pos);
325 // Check if this suffix comes before another that was found later in the string
326 if(tmp == pos) {
327 //std::cout << "found suffix at " << suffix_pos << std::endl;
328 suffix_multiplier = suffix.mult;
329 after_suffix_pos = tmp + strlen(str);
330 break;
331 }
332 }
333 if(suffix_multiplier != 0){
334 break;
335 }
336 }
337
338 if(suffix_multiplier == 0){
339 suffix_multiplier = 1;
340 }
341
342 // Apply suffix modifier extracted when parsing the suffix (e.g. "k" = N*1000)
343 value *= suffix_multiplier;
344
345 // Apply suffix modifier to the fractional portion
346 if(fractional.size() != 0){
347 // Check each digit to ensure that it is an integer portion of the
348 // This is done manually to guarantee NO floating point rounding errors can occur
349 uint32_t ten_div = 10;
350 for(auto ch : fractional){
351 uint64_t frac = (ch - '0');
352 sparta_assert(frac < 10);
353 frac *= suffix_multiplier;
354 if(frac < ten_div){
355 throw SpartaException("Encountered a fractional value: \"") << numeric << "\" . \""
356 << fractional << "\" but suffix multipler was only " << suffix_multiplier
357 << ", so this fraction does not represent and integer. Fraction should "
358 "not have a 1/" << ten_div << " place. Found in \"" << s << "\"";
359 }
360 frac /= ten_div;
361 value += frac;
362 ten_div *= 10;
363 }
364 }
365
366 // Recursively reparse remainder of string and add result to current value
367 if(allow_recursion){
368 if(after_suffix_pos != std::string::npos){
369 uint64_t addition = smartLexicalCast<uint64_t>(s.substr(after_suffix_pos),
370 after_suffix_pos, // Pos after parsing
371 true, // Allow recursion
372 false); // Disallow prefixes
373
374 value += addition;
375 }
376 }
377
378 end_pos = after_suffix_pos;
379
380 // Reject any garbage remaining
381 if(end_pos != std::string::npos){
382 size_t garbage_pos = s.find_first_not_of(WHITESPACE, end_pos);
383 if(garbage_pos != std::string::npos) {
384 throw SpartaException("Found non-'whitespace' grabage character '") << s[garbage_pos]
385 << "' after suffix (at or after char " << end_pos << ") in string being "\
386 "smart-lexically-cast: \"" << s << "\"";
387 }
388 }
389 }
390
391 return value;
392}
393
394template <>
395inline uint32_t smartLexicalCast(const std::string& s,
396 size_t& end_pos,
397 bool allow_recursion,
398 bool allow_prefix) {
399 uint64_t val = smartLexicalCast<uint64_t>(s, end_pos, allow_recursion, allow_prefix);
400 if(val >= (1_u64 << 32)){
401 throw SpartaException("Could not lexically cast \"") << s << "\" to a uint32_t because it "
402 "contained a value this type could not contain: " << val;
403 }
404 return (uint32_t)val;
405}
406
407template <>
408inline uint16_t smartLexicalCast(const std::string& s,
409 size_t& end_pos,
410 bool allow_recursion,
411 bool allow_prefix) {
412 uint64_t val = smartLexicalCast<uint64_t>(s, end_pos, allow_recursion, allow_prefix);
413 if(val >= (1_u64 << 16)){
414 throw SpartaException("Could not lexically cast \"") << s << "\" to a uint32_t because it "
415 "contained a value this type could not contain: " << val;
416 }
417 return (uint16_t)val;
418}
419
420template <>
421inline uint8_t smartLexicalCast(const std::string& s,
422 size_t& end_pos,
423 bool allow_recursion,
424 bool allow_prefix) {
425 uint64_t val = smartLexicalCast<uint64_t>(s, end_pos, allow_recursion, allow_prefix);
426 if(val >= (1_u64 << 8)){
427 throw SpartaException("Could not lexically cast \"") << s << "\" to a uint32_t because it "
428 "contained a value this type could not contain: " << val;
429 }
430 return (uint8_t)val;
431}
432
433template <>
434inline int64_t smartLexicalCast(const std::string& s,
435 size_t& end_pos,
436 bool allow_recursion,
437 bool allow_prefix) {
438 // Get negative sign from front of string
439 size_t after_neg_pos = 0;
440 size_t neg_pos = s.find_first_not_of(WHITESPACE);
441 bool negate = false;
442 if(neg_pos != std::string::npos){
443 if(s[neg_pos] == '-'){
444 negate = true;
445 after_neg_pos = neg_pos + 1;
446 }
447 // after_neg_pos remains 0
448 }
449
450 uint64_t val = smartLexicalCast<uint64_t>(s.substr(after_neg_pos), end_pos, allow_recursion, allow_prefix);
451 if((negate == false && val >= (1_u64 << 63))
452 || (negate == true && val > (1_u64 << 63))) {
453 throw SpartaException("Could not lexically cast \"") << s << "\" to a int64_t because it "
454 "contained a value this type could not contain: " << val;
455 }
456 int64_t signed_val = val;
457 if(negate){
458 signed_val *= -1;
459 }
460 return signed_val;
461}
462
463template <>
464inline int32_t smartLexicalCast(const std::string& s,
465 size_t& end_pos,
466 bool allow_recursion,
467 bool allow_prefix) {
468 int64_t val = smartLexicalCast<int64_t>(s, end_pos, allow_recursion, allow_prefix);
469 if(val >= (1_64 << 31)
470 || val < -(1_64 << 31)){
471 throw SpartaException("Could not lexically cast \"") << s << "\" to a int32_t because it "
472 "contained a value larger than this type could not contain: " << val;
473 }
474 return (int32_t)val;
475}
476
477template <>
478inline int16_t smartLexicalCast(const std::string& s,
479 size_t& end_pos,
480 bool allow_recursion,
481 bool allow_prefix) {
482 int64_t val = smartLexicalCast<int64_t>(s, end_pos, allow_recursion, allow_prefix);
483 if(val >= (1_64 << 16)
484 || val < -(1_64 << 16)){
485 throw SpartaException("Could not lexically cast \"") << s << "\" to a int16_t because it "
486 "contained a value this type could not contain: " << val;
487 }
488 return (int16_t)val;
489}
490
491template <>
492inline int8_t smartLexicalCast(const std::string& s,
493 size_t& end_pos,
494 bool allow_recursion,
495 bool allow_prefix) {
496 int64_t val = smartLexicalCast<int64_t>(s, end_pos, allow_recursion, allow_prefix);
497 if(val >= (1_64 << 8)
498 || val < -(1_64 << 8)){
499 throw SpartaException("Could not lexically cast \"") << s << "\" to a int8_t because it "
500 "contained a value this type could not contain: " << val;
501 }
502 return (int8_t)val;
503}
504
505template <>
506inline double smartLexicalCast(const std::string& s,
507 size_t& end_pos,
508 bool allow_recursion,
509 bool allow_prefix) {
510 (void) allow_recursion;
511 (void) allow_prefix;
512
513 utils::ValidValue<double> dbl_value;
514 end_pos = 0;
515
516 try {
517 dbl_value = boost::lexical_cast<double>(s);
518 } catch (const boost::bad_lexical_cast &) {
519 throw SpartaException("Could not lexically cast \"") << s << "\" to a double";
520 }
521
522 end_pos = std::string::npos;
523 return dbl_value.getValue();
524}
525
526} // namespace utils
527} // namespace sparta
String-to-value helpers and string formatting helpers.
static const char * DECIMAL_DIGITS
All decimal (base 10) digits allowed and '.'.
static const std::vector< Modifier > suffixes
Suffixes supported.
static const char * WHITESPACE
All whitespace characters allowed.
static const std::vector< RadixPrefix > prefixes
Radixes supported.
std::string parseNumericString(const std::string &s, size_t pos, const char *digits, size_t &after_numeric)
Parse and return a numeric string from after a certain position in an input string.
#define sparta_assert(...)
Simple variadic assertion that will throw a sparta_exception if the condition fails.
Exception class for all of Sparta.
File that defines a ValidValue.
Macros for handling exponential backoff.
T smartLexicalCast(const ParameterBase *p, const std::string &s, size_t &end_pos, bool allow_recursion=true, bool allow_prefix=true)
smartLexicalCast wrapper with parameter information added to exceptions
uint32_t replaceSubstring(std::string &s, const std::string &from, const std::string &to)
Replaces within a string 's' all instances of some string 'from' with 'to'.
Definition Utils.hpp:265
Modifier instance - Associates some suffix strings (e.g. "b") with a semantic (e.g....
std::vector< const char * > options
Suffix strings identifying this modifier (case sensitive)
uint64_t mult
Multiplier applied to the value when this modifier is found.
Prefix instance - Associates some prefix strings (e.g. "0x") with a radix and a set of allowed digit ...
const char *const digits
Valid digits in a number following this prefix.
const uint32_t radix
Radix associated with this prefix.
const std::vector< const char * > options
Prefix strings identifying this prefix (case sensitive)