Routines for converting between different character encodings. On UNIX, this uses the iconv library, on Windows the Windows API.

The following example shows how to change character encodings.


import std/encodings
when defined(windows):
    orig = "öäüß"
    # convert `orig` from "UTF-8" to "CP1252"
    cp1252 = convert(orig, "CP1252", "UTF-8")
    # convert `cp1252` from "CP1252" to "ibm850"
    ibm850 = convert(cp1252, "ibm850", "CP1252")
    current = getCurrentEncoding()
  assert orig == "\195\182\195\164\195\188\195\159"
  assert ibm850 == "\148\132\129\225"
  assert convert(ibm850, current, "ibm850") == orig
The example below uses a reuseable EncodingConverter object which is created by open with destEncoding and srcEncoding specified. You can use convert on this object multiple times.


import std/encodings
when defined(windows):
  var fromGB2312 = open("utf-8", "gb2312")
  let first = "\203\173\197\194\163\191\210\187" &
  assert fromGB2312.convert(first) == "谁怕?一蓑烟雨任平生"

  let second = "\211\208\176\215\205\183\200\231" &
  assert fromGB2312.convert(second) == "有白头如新,倾盖如故"


EncodingConverter = object
  dest, src: CodePage
EncodingError = object of ValueError
Exception that is raised for encoding errors.


proc close(c: EncodingConverter) {....raises: [], tags: [].}
Frees the resources the converter c holds.
proc codePageToName(c: CodePage): string {....raises: [], tags: [].}
proc convert(c: EncodingConverter; s: string): string {.
    ...raises: [EncodingError, OSError], tags: [].}
proc convert(s: string; destEncoding = "UTF-8"; srcEncoding = "CP1252"): string {.
    ...raises: [ValueError, EncodingError, EncodingError, OSError], tags: [].}
Converts s to destEncoding. It assumed that s is in srcEncoding. This opens a converter, uses it and closes it again and is thus more convenient but also likely less efficient than re-using a converter.
Warning: UTF-16BE and UTF-32 conversions are not supported on Windows.
proc getCurrentEncoding(uiApp = false): string {....raises: [], tags: [].}
Retrieves the current encoding. On Unix, "UTF-8" is always returned. The uiApp parameter is Windows specific. If true, the UI's code-page is returned, if false, the Console's code-page is returned.
proc nameToCodePage(name: string): CodePage {....raises: [ValueError], tags: [].}
proc open(destEncoding = "UTF-8"; srcEncoding = "CP1252"): EncodingConverter {.
    ...raises: [ValueError, EncodingError], tags: [].}
Opens a converter that can convert from srcEncoding to destEncoding. Raises IOError if it cannot fulfill the request.