This module contains helpers for parsing tokens, numbers, integers, floats, identifiers, etc.

To unpack raw bytes look at the streams module.

let logs = @["2019-01-10: OK_", "2019-01-11: FAIL_", "2019-01: aaaa"]
var outp: seq[string]

for log in logs:
  var res: string
  if parseUntil(log, res, ':') == 10: # YYYY-MM-DD == 10
    outp.add(res & " - " & captureBetween(log, ' ', '_'))
doAssert outp == @["2019-01-10 - OK", "2019-01-11 - FAIL"]

from std/strutils import Digits, parseInt

let
  input1 = "2019 school start"
  input2 = "3 years back"
  startYear = input1[0 .. skipWhile(input1, Digits)-1] # 2019
  yearsBack = input2[0 .. skipWhile(input2, Digits)-1] # 3
  examYear = parseInt(startYear) + parseInt(yearsBack)
doAssert "Examination is in " & $examYear == "Examination is in 2022"

See also:

strutils module for combined and identical parsing proc's
json module for a JSON parser
parsecfg module for a configuration file parser
parsecsv module for a simple CSV (comma separated value) parser
parseopt module for a command line parser
parsexml module for a XML / HTML parser
other parsers for other parsers

Types

InterpolatedKind = enum ikStr, ## ``str`` part of the interpolated string ikDollar, ## escaped ``$`` part of the interpolated string ikVar, ## ``var`` part of the interpolated string ikExpr ## ``expr`` part of the interpolated string: Describes for interpolatedFragments which part of the interpolated string is yielded; for example in "str$$$var${expr}" Source Edit

Procs

proc captureBetween(s: string; first: char; second = '\x00'; start = 0): string {.
    ...raises: [], tags: [].}

Finds the first occurrence of first, then returns everything from there up to second (if second is '0', then first is used).

Example:

doAssert captureBetween("Hello World", 'e') == "llo World"
doAssert captureBetween("Hello World", 'e', 'r') == "llo Wo"
doAssert captureBetween("Hello World", 'l', start = 6) == "d"

Source Edit

proc parseBiggestFloat(s: string; number: var BiggestFloat; start = 0): int {.
    magic: "ParseBiggestFloat", importc: "nimParseBiggestFloat", noSideEffect,
    ...raises: [], tags: [].}

Parses a float starting at start and stores the value into number. Result is the number of processed chars or 0 if a parsing error occurred. Source Edit

proc parseBiggestInt(s: string; number: var BiggestInt; start = 0): int {.
    ...gcsafe, extern: "npuParseBiggestInt", noSideEffect, ...raises: [ValueError],
    tags: [].}

Parses an integer starting at start and stores the value into number. Result is the number of processed chars or 0 if there is no integer. ValueError is raised if the parsed integer is out of the valid range.

Example:

var res: BiggestInt
doAssert parseBiggestInt("9223372036854775807", res, 0) == 19
doAssert res == 9223372036854775807

Source Edit

proc parseBiggestUInt(s: string; number: var BiggestUInt; start = 0): int {.
    ...gcsafe, extern: "npuParseBiggestUInt", noSideEffect, ...raises: [ValueError],
    tags: [].}

Parses an unsigned integer starting at start and stores the value into number. ValueError is raised if the parsed integer is out of the valid range.

Example:

var res: BiggestUInt
doAssert parseBiggestUInt("12", res, 0) == 2
doAssert res == 12
doAssert parseBiggestUInt("1111111111111111111", res, 0) == 19
doAssert res == 1111111111111111111'u64

Source Edit

proc parseBin[T: SomeInteger](s: string; number: var T; start = 0; maxLen = 0): int {.
    noSideEffect.}

Parses a binary number and stores its value in number.

Returns the number of the parsed characters or 0 in case of an error. If error, the value of number is not changed.

If maxLen == 0, the parsing continues until the first non-bin character or to the end of the string. Otherwise, no more than maxLen characters are parsed starting from the start position.

It does not check for overflow. If the value represented by the string is too big to fit into number, only the value of last fitting characters will be stored in number without producing an error.

Example:

var num: int
doAssert parseBin("0100_1110_0110_1001_1110_1101", num) == 29
doAssert num == 5138925
doAssert parseBin("3", num) == 0
var num8: int8
doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8) == 32
doAssert num8 == 0b1110_1101'i8
doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8, 3, 9) == 9
doAssert num8 == 0b0100_1110'i8
var num8u: uint8
doAssert parseBin("0b_0100_1110_0110_1001_1110_1101", num8u) == 32
doAssert num8u == 237
var num64: int64
doAssert parseBin("0100111001101001111011010100111001101001", num64) == 40
doAssert num64 == 336784608873

Source Edit

proc parseChar(s: string; c: var char; start = 0): int {....raises: [], tags: [].}

Parses a single character, stores it in c and returns 1. In case of error (if start >= s.len) it returns 0 and the value of c is unchanged.

Example:

var c: char
doAssert "nim".parseChar(c, 3) == 0
doAssert c == '\0'
doAssert "nim".parseChar(c, 0) == 1
doAssert c == 'n'

Source Edit

proc parseFloat(s: string; number: var float; start = 0): int {....gcsafe,
    extern: "npuParseFloat", noSideEffect, ...raises: [], tags: [].}

Parses a float starting at start and stores the value into number. Result is the number of processed chars or 0 if there occurred a parsing error.

Example:

var res: float
doAssert parseFloat("32", res, 0) == 2
doAssert res == 32.0
doAssert parseFloat("32.57", res, 0) == 5
doAssert res == 32.57
doAssert parseFloat("32.57", res, 3) == 2
doAssert res == 57.00

Source Edit

proc parseHex[T: SomeInteger](s: string; number: var T; start = 0; maxLen = 0): int {.
    noSideEffect.}

Parses a hexadecimal number and stores its value in number.

Returns the number of the parsed characters or 0 in case of an error. If error, the value of number is not changed.

If maxLen == 0, the parsing continues until the first non-hex character or to the end of the string. Otherwise, no more than maxLen characters are parsed starting from the start position.

Example:

var num: int
doAssert parseHex("4E_69_ED", num) == 8
doAssert num == 5138925
doAssert parseHex("X", num) == 0
doAssert parseHex("#ABC", num) == 4
var num8: int8
doAssert parseHex("0x_4E_69_ED", num8) == 11
doAssert num8 == 0xED'i8
doAssert parseHex("0x_4E_69_ED", num8, 3, 2) == 2
doAssert num8 == 0x4E'i8
var num8u: uint8
doAssert parseHex("0x_4E_69_ED", num8u) == 11
doAssert num8u == 237
var num64: int64
doAssert parseHex("4E69ED4E69ED", num64) == 12
doAssert num64 == 86216859871725

Source Edit

proc parseIdent(s: string; ident: var string; start = 0): int {....raises: [],
    tags: [].}

Parses an identifier and stores it in ident. Returns the number of the parsed characters or 0 in case of an error. If error, the value of ident is not changed.

Example:

var res: string
doAssert parseIdent("Hello World", res, 0) == 5
doAssert res == "Hello"
doAssert parseIdent("Hello World", res, 1) == 4
doAssert res == "ello"
doAssert parseIdent("Hello World", res, 6) == 5
doAssert res == "World"

Source Edit

proc parseIdent(s: string; start = 0): string {....raises: [], tags: [].}

Parses an identifier and returns it or an empty string in case of an error.

Example:

doAssert parseIdent("Hello World", 0) == "Hello"
doAssert parseIdent("Hello World", 1) == "ello"
doAssert parseIdent("Hello World", 5) == ""
doAssert parseIdent("Hello World", 6) == "World"

Source Edit

proc parseInt(s: string; number: var int; start = 0): int {....gcsafe,
    extern: "npuParseInt", noSideEffect, ...raises: [ValueError], tags: [].}

Example:

var res: int
doAssert parseInt("2019", res, 0) == 4
doAssert res == 2019
doAssert parseInt("2019", res, 2) == 2
doAssert res == 19

Source Edit

proc parseOct[T: SomeInteger](s: string; number: var T; start = 0; maxLen = 0): int {.
    noSideEffect.}

Parses an octal number and stores its value in number.

Returns the number of the parsed characters or 0 in case of an error. If error, the value of number is not changed.

If maxLen == 0, the parsing continues until the first non-oct character or to the end of the string. Otherwise, no more than maxLen characters are parsed starting from the start position.

Example:

var num: int
doAssert parseOct("0o23464755", num) == 10
doAssert num == 5138925
doAssert parseOct("8", num) == 0
var num8: int8
doAssert parseOct("0o_1464_755", num8) == 11
doAssert num8 == -19
doAssert parseOct("0o_1464_755", num8, 3, 3) == 3
doAssert num8 == 102
var num8u: uint8
doAssert parseOct("1464755", num8u) == 7
doAssert num8u == 237
var num64: int64
doAssert parseOct("2346475523464755", num64) == 16
doAssert num64 == 86216859871725

Source Edit

proc parseSaturatedNatural(s: string; b: var int; start = 0): int {....raises: [],
    tags: [].}

Parses a natural number into b. This cannot raise an overflow error. high(int) is returned for an overflow. The number of processed character is returned. This is usually what you really want to use instead of parseInt.

Example:

var res = 0
discard parseSaturatedNatural("848", res)
doAssert res == 848

Source Edit

func parseSize(s: string; size: var int64; alwaysBin = false): int {....raises: [],
    tags: [].}

Parse a size qualified by binary or metric units into size. This format is often called "human readable". Result is the number of processed chars or 0 on parse errors and size is rounded to the nearest integer. Trailing garbage like "/s" in "1k/s" is allowed and detected by result < s.len.

To simplify use, following non-rare wild conventions, and since fractional data like milli-bytes is so rare, unit matching is case-insensitive but for the 'i' distinguishing binary-metric from metric (which cannot be 'I').

An optional trailing 'B|b' is ignored but processed. I.e., you must still know if units are bytes | bits or infer this fact via the case of s[^1] (if users can even be relied upon to use 'B' for byte and 'b' for bit or have that be s[^1]).

If alwaysBin==true then scales are always binary-metric, but e.g. "KiB" is still accepted for clarity. If the value would exceed the range of int64, size saturates to int64.high. Supported metric prefix chars include k, m, g, t, p, e, z, y (but z & y saturate unless the number is a small fraction).

See also:

https://en.wikipedia.org/wiki/Binary_prefix
formatSize module for formatting

Example:

var res: int64  # caller must still know if 'b' refers to bytes|bits
doAssert parseSize("10.5 MB", res) == 7
doAssert res == 10_500_000  # decimal metric Mega prefix
doAssert parseSize("64 mib", res) == 6
doAssert res == 67108864    # 64 shl 20
doAssert parseSize("1G/h", res, true) == 2 # '/' stops parse
doAssert res == 1073741824  # 1 shl 30, forced binary metric

Source Edit

proc parseUInt(s: string; number: var uint; start = 0): int {....gcsafe,
    extern: "npuParseUInt", noSideEffect, ...raises: [ValueError], tags: [].}

Parses an unsigned integer starting at start and stores the value into number. ValueError is raised if the parsed integer is out of the valid range.

Example:

var res: uint
doAssert parseUInt("3450", res) == 4
doAssert res == 3450
doAssert parseUInt("3450", res, 2) == 2
doAssert res == 50

Source Edit

proc parseUntil(s: string; token: var string; until: char; start = 0): int {.
    inline, ...raises: [], tags: [].}

Parses a token and stores it in token. Returns the number of the parsed characters or 0 in case of an error. A token consists of any character that is not the until character.

Example:

var myToken: string
doAssert parseUntil("Hello World", myToken, 'W') == 6
doAssert myToken == "Hello "
doAssert parseUntil("Hello World", myToken, 'o') == 4
doAssert myToken == "Hell"
doAssert parseUntil("Hello World", myToken, 'o', 2) == 2
doAssert myToken == "ll"

Source Edit

proc parseUntil(s: string; token: var string; until: set[char]; start = 0): int {.
    inline, ...raises: [], tags: [].}

Parses a token and stores it in token. Returns the number of the parsed characters or 0 in case of an error. A token consists of the characters notin until.

Example:

var myToken: string
doAssert parseUntil("Hello World", myToken, {'W', 'o', 'r'}) == 4
doAssert myToken == "Hell"
doAssert parseUntil("Hello World", myToken, {'W', 'r'}) == 6
doAssert myToken == "Hello "
doAssert parseUntil("Hello World", myToken, {'W', 'r'}, 3) == 3
doAssert myToken == "lo "

Source Edit

proc parseUntil(s: string; token: var string; until: string; start = 0): int {.
    inline, ...raises: [], tags: [].}

Parses a token and stores it in token. Returns the number of the parsed characters or 0 in case of an error. A token consists of any character that comes before the until token.

Example:

var myToken: string
doAssert parseUntil("Hello World", myToken, "Wor") == 6
doAssert myToken == "Hello "
doAssert parseUntil("Hello World", myToken, "Wor", 2) == 4
doAssert myToken == "llo "

Source Edit

proc parseWhile(s: string; token: var string; validChars: set[char]; start = 0): int {.
    inline, ...raises: [], tags: [].}

Parses a token and stores it in token. Returns the number of the parsed characters or 0 in case of an error. A token consists of the characters in validChars.

Example:

var myToken: string
doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 0) == 0
doAssert myToken.len() == 0
doAssert parseWhile("Hello World", myToken, {'W', 'o', 'r'}, 6) == 3
doAssert myToken == "Wor"

Source Edit

proc skip(s, token: string; start = 0): int {.inline, ...raises: [], tags: [].}

Skips the token starting at s[start]. Returns the length of token or 0 if there was no token at s[start].

Example:

doAssert skip("2019-01-22", "2019", 0) == 4
doAssert skip("2019-01-22", "19", 0) == 0
doAssert skip("2019-01-22", "19", 2) == 2
doAssert skip("CAPlow", "CAP", 0) == 3
doAssert skip("CAPlow", "cap", 0) == 0

Source Edit

proc skipIgnoreCase(s, token: string; start = 0): int {....raises: [], tags: [].}

Same as skip but case is ignored for token matching.

Example:

doAssert skipIgnoreCase("CAPlow", "CAP", 0) == 3
doAssert skipIgnoreCase("CAPlow", "cap", 0) == 3

Source Edit

proc skipUntil(s: string; until: char; start = 0): int {.inline, ...raises: [],
    tags: [].}

Skips all characters until the char until is found or the end is reached. Returns number of characters skipped.

Example:

doAssert skipUntil("Hello World", 'o', 0) == 4
doAssert skipUntil("Hello World", 'o', 4) == 0
doAssert skipUntil("Hello World", 'W', 0) == 6
doAssert skipUntil("Hello World", 'w', 0) == 11

Source Edit

proc skipUntil(s: string; until: set[char]; start = 0): int {.inline,
    ...raises: [], tags: [].}

Skips all characters until one char from the set until is found or the end is reached. Returns number of characters skipped.

Example:

doAssert skipUntil("Hello World", {'W', 'e'}, 0) == 1
doAssert skipUntil("Hello World", {'W'}, 0) == 6
doAssert skipUntil("Hello World", {'W', 'd'}, 0) == 6

Source Edit

proc skipWhile(s: string; toSkip: set[char]; start = 0): int {.inline,
    ...raises: [], tags: [].}

Skips all characters while one char from the set token is found. Returns number of characters skipped.

Example:

doAssert skipWhile("Hello World", {'H', 'e'}) == 2
doAssert skipWhile("Hello World", {'e'}) == 0
doAssert skipWhile("Hello World", {'W', 'o', 'r'}, 6) == 3

Source Edit

proc skipWhitespace(s: string; start = 0): int {.inline, ...raises: [], tags: [].}

Skips the whitespace starting at s[start]. Returns the number of skipped characters.

Example:

doAssert skipWhitespace("Hello World", 0) == 0
doAssert skipWhitespace(" Hello World", 0) == 1
doAssert skipWhitespace("Hello World", 5) == 1
doAssert skipWhitespace("Hello  World", 5) == 2

Source Edit

Iterators

iterator interpolatedFragments(s: string): tuple[kind: InterpolatedKind, value: string] {....raises: [ValueError], tags: [].}: Tokenizes the string s into substrings for interpolation purposes.
Example:

var outp: seq[tuple[kind: InterpolatedKind, value: string]] for k, v in interpolatedFragments(" $this is ${an example} $$"): outp.add (k, v) doAssert outp == @[(ikStr, " "), (ikVar, "this"), (ikStr, " is "), (ikExpr, "an example"), (ikStr, " "), (ikDollar, "$")]
Source Edit

std/parseutils

Types

Procs

Iterators