Return output length even if output array is NULL. Ref #708

2025-04-15 15:06:44 +00:00 · 2018-11-27 14:24:48 -06:00 · 2018-11-27 14:24:48 -06:00 · 5333773fbd
commit 5333773fbd
parent ca99386026
4 changed files with 433 additions and 261 deletions
--- a/include/trick/unicode_utils.h
+++ b/include/trick/unicode_utils.h
@ -2,14 +2,15 @@
 #define UNITCODE_UTILS_H
 #include <stddef.h>

-/* Maintainer: John M. Penn */
+/* Author: John M. Penn */

 #ifdef __cplusplus
 extern "C" {
 #endif

 /* Convert Unicode codepoint to UTF-32. Validates that it's a legal unicode value.
-   Returns 1, if successful, 0 otherwise. */
+   Returns 1, if successful, 0 otherwise.
+ */
 size_t ucodepoint_to_utf32(unsigned int codePoint, int32_t *out);

 /* Convert Unicode codepoint to UTF-16.
@ -24,27 +25,46 @@ size_t ucodepoint_to_utf16(unsigned int codePoint, int16_t (*out)[2]);
 */ 
 size_t ucodepoint_to_utf8(unsigned int codePoint, char (*out)[4]);

-
-
-/* Un-escape C escape sequences, including \u and \U Unicode escape sequences,
-   in an ASCII character array, producing a UTF-8 character array. Return the
-   number of elements in the character string.
-*/
-size_t ascii_to_utf8(const char *in, char *out, size_t outSize); 
-
 /* Escape ('\' escape codes) all unicode and non-printable ASCII characters
-   in a UTF-8 character string. Return the number of elements in the character string.
+   in a UTF-8 character string to an all-ASCII representation.
+   Returns the number of elements in the character string, or 0 on failure.
+ */
+size_t escape_to_ascii(const char *in, char *out, size_t outSize); 
+
+/* Return the length of the array that would be produced if it were converted,
+   or 0 on failure.
+ */
+size_t escape_to_ascii_len(const char *in);
+
+/* Un-escape C-language escape sequences, including \u and \U Unicode escape sequences,
+   in an ASCII character array, producing a UTF-8 character array.
+   Returns the number of elements in the character string, or 0 on failure.
 */
-size_t utf8_to_printable_ascii(const char *in, char *out, size_t outSize); 
+size_t unescape_to_utf8(const char *in, char *out, size_t outSize); 
+
+/* Return the length of the array that would be produced if it were converted,
+   or 0 on failure.
+ */
+size_t unescape_to_utf8_len(const char *in);

 /* Convert a UTF-8 character array to a wchar_t array. Supports 16, and 32 bit wchar_t.
-   Return the number of elements in the wchar_t string. */
+   Returns the number of elements in the wchar_t string, or 0 on failure. */
 size_t utf8_to_wchar(const char *in, wchar_t *out, size_t outSize);

-/* Convert wchar_t character array to UTF-8. Return the number of elements in
-   the character (utf-8) string.*/
+/* Return the length of the array that would be produced if in were converted,
+   or 0 on failure.
+ */
+size_t utf8_to_wchar_len(const char *in);
+
+/* Convert wchar_t character array to UTF-8.
+   Returns the number of elements in the character (utf-8) string,
+   or 0 on failure.
+*/
 size_t wchar_to_utf8(const wchar_t *in, char *out, size_t outSize);

+/* Return the length of the array that would be produced if in were converted, or 0 on failure. */
+size_t wchar_to_utf8_len(const wchar_t *in);
+
 #ifdef __cplusplus
 }
 #endif
--- a/trick_source/trick_utils/unicode/src/unicode_utils.c
+++ b/trick_source/trick_utils/unicode/src/unicode_utils.c
@ -6,7 +6,9 @@
 #include <stdint.h>
 #include "trick/unicode_utils.h"

-/* Maintainer: John M. Penn */
+/* Author: John M. Penn */
+
+#define ERROR_STATE 99

 size_t ucodepoint_to_utf32(unsigned int codePoint, int32_t *out) {

@ -69,24 +71,20 @@ size_t ucodepoint_to_utf8(unsigned int codePoint, char (*out)[4]) {
    return 0;
 }

-size_t utf8_to_printable_ascii(const char *in, char *out, size_t outSize) {
-
-    int state = 0;
+size_t escape_to_ascii(const char *in, char *out, size_t outSize) {
    unsigned int codePoint;
-    char wks[11];
-
-    if (out == NULL) {
-        fprintf(stderr,"%s:ERROR: ASCII char pointer (out) is NULL. No conversion performed.\n", __FUNCTION__);
-        return 0;
-    }
-    out[0] = 0;
+    size_t out_len = 0;
+    int state = 0;
+    char ascii_elements[11];

    if (in == NULL) {
        fprintf(stderr,"%s:ERROR: UTF8 char-pointer (in) is NULL. No conversion performed.\n", __FUNCTION__);
        return 0;
    }

-    while (*in != 0) {
+    if (out != NULL) out[out_len] = 0;
+
+    while ((*in != 0) && (state != ERROR_STATE)) {
        unsigned char ch = *in;
        switch (state) {
            case 0: {
@ -101,55 +99,61 @@ size_t utf8_to_printable_ascii(const char *in, char *out, size_t outSize) {
                    state = 1;
                } else if (ch >= 0x80) {   // We should never find a continuation byte in isolation.
                    fprintf(stderr,"%s:ERROR: UTF8 string (in) appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                } else {                   // ASCII        
                    if (ch == '\a') {
-                        sprintf(wks,"\\a");
+                        sprintf(ascii_elements, "\\a");
                    } else if (ch == '\b') {
-                        sprintf(wks,"\\b");
+                        sprintf(ascii_elements, "\\b");
                    } else if (ch == '\f') {
-                        sprintf(wks,"\\f");
+                        sprintf(ascii_elements, "\\f");
                    } else if (ch == '\n') {
-                        sprintf(wks,"\\n");
+                        sprintf(ascii_elements, "\\n");
                    } else if (ch == '\r') {
-                        sprintf(wks,"\\r");
+                        sprintf(ascii_elements, "\\r");
                    } else if (ch == '\t') {
-                        sprintf(wks,"\\t");
+                        sprintf(ascii_elements, "\\t");
                    } else if (ch == '\v') {
-                        sprintf(wks,"\\v");
+                        sprintf(ascii_elements, "\\v");
                    } else if (isprint(ch)) {
-                        sprintf(wks,"%c",ch);
+                        sprintf(ascii_elements, "%c",ch);
                    } else {
-                        sprintf(wks,"\\x%02x",ch);
+                        sprintf(ascii_elements, "\\x%02x",ch);
                    }
-                    if ((strlen(out)+strlen(wks)) < outSize-1) {
-                        strcat(out, wks);
-                    } else {
-                        fprintf(stderr,"%s:ERROR: Insufficient room in (out) array.\n", __FUNCTION__);
-                        state = 99;
+                    size_t n_elements = strlen(ascii_elements);
+                    if (out != NULL) {
+                        if ((out_len + n_elements) < outSize) {
+                            strcat(out, ascii_elements);
+                        } else {
+                            fprintf(stderr,"%s:ERROR: Insufficient room in (out) array.\n", __FUNCTION__);
+                            state = ERROR_STATE;
+                        }
                    }
+                    out_len += n_elements;
                }
            } break;
            case 1: { // Expecting one continuation byte.
                if ((ch & 0xc0) == 0x80) { // If the next char is a continuation byte ..
                    codePoint = (codePoint << 6) | (ch & 0x3f); // Extract low 6 bits
                    state = 0;
-
                    if (codePoint <= 0xffff) {
-                        sprintf(wks,"\\u%04x", codePoint); 
+                        sprintf(ascii_elements, "\\u%04x", codePoint); 
                    } else {
-                        sprintf(wks,"\\U%08x", codePoint); 
+                        sprintf(ascii_elements, "\\U%08x", codePoint); 
                    }
-                    if ((strlen(out)+strlen(wks)) < outSize-1) {
-                        strcat(out, wks);
-                    } else {
-                        fprintf(stderr,"%s:ERROR: Insufficient room in (out) array.\n", __FUNCTION__);
-                        state = 99;
+                    size_t n_elements = strlen(ascii_elements);
+                    if (out != NULL) {
+                        if ((out_len + n_elements) < outSize) {
+                            strcat(out, ascii_elements);
+                        } else {
+                            fprintf(stderr,"%s:ERROR: Insufficient room in (out) array.\n", __FUNCTION__);
+                            state = ERROR_STATE;
+                        }
                    }
-
+                    out_len += n_elements;
                } else {
                    fprintf(stderr,"%s:ERROR: UTF8 string appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                }
            } break;
            case 2: { /* Expecting two continuation bytes. */
@ -158,7 +162,7 @@ size_t utf8_to_printable_ascii(const char *in, char *out, size_t outSize) {
                    state = 1;
                } else {
                    fprintf(stderr,"%s:ERROR: UTF8 string appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                }
            } break;
            case 3: { /* Expecting three continuation bytes. */
@ -167,60 +171,60 @@ size_t utf8_to_printable_ascii(const char *in, char *out, size_t outSize) {
                    state = 2;
                } else {
                    fprintf(stderr,"%s:ERROR: UTF8 string appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                }
            } break;
            default: { 
-                out[0] = 0;
-                return 0;
+                state = ERROR_STATE;
            } break;
        }
        in ++;
    }
+    /* If we didn't finished in state 0, then we had an error. */
+    if (state != 0) {
+        out_len = 0;
+    } 
+    if (out != NULL) out[out_len] = 0; /* NULL termination of string. */
+    return out_len;
+}

-    /* If we finished in state 0, then we're good. Just
-       terminate the string, otherwise we had an error. */
-    if (state == 0) {
-        return strlen(out);
-    } else {
-        out[0] = 0;
-        return 0;
-    }
+size_t escape_to_ascii_len(const char *in) {
+    return escape_to_ascii( in, NULL, (size_t)0);
 }

 /* Un-escapes ASCII and Unicode escape sequences, and encodes them into UTF-8. */
-size_t ascii_to_utf8(const char *in, char *out, size_t outSize) {
+size_t unescape_to_utf8(const char *in, char *out, size_t outSize) {

    unsigned int codePoint = 0;
-    size_t len = 0;
+    size_t out_len = 0;
    int state = 0;
    int digitsExpected = 0;

-    if (out == NULL) {
-        fprintf(stderr,"%s:ERROR: ASCII char pointer (out) is NULL. No conversion performed.\n", __FUNCTION__);
-        return 0;
-    }
-    out[0] = 0;
-
    if (in == NULL) {
-        fprintf(stderr,"%s:ERROR: UTF8 char-pointer (in) is NULL. No conversion performed.\n", __FUNCTION__);
+        fprintf(stderr,"%s:ERROR: char-pointer (in) is NULL. No conversion performed.\n", __FUNCTION__);
        return 0;
    }

-    while (*in != 0) {
+    if (out != NULL) out[out_len] = 0;
+
+    while ((*in != 0) && (state != ERROR_STATE )) {
        unsigned char ch = *in;
-        if (ch > 0x7f) { /* All input characters must be ASCII. */
-            fprintf(stderr,"%s:ERROR: ASCII string (in) contains non-ASCII values.\n", __FUNCTION__);
-            out[0] = 0; 
-            return 0;
-        }
-        /* All escaped characters will be un-escaped. */
        switch(state) {
            case 0: { // Normal State
-                if (ch =='\\') {
+                if (ch >= 0xf0) {        // Start of a 4-byte UTF-8 sequence.
+                    if (out != NULL) out[out_len] = ch; out_len++; state = 3;
+                } else if (ch >= 0xe0) { // Start of a 3-byte UTF-8 sequence.
+                    if (out != NULL) out[out_len] = ch; out_len++; state = 4;
+                } else if (ch >= 0xc0) { // Start of a 2-byte UTF-8 sequence.
+                    if (out != NULL) out[out_len] = ch; out_len++; state = 5;
+                } else if (ch >= 0x80) { // We should never find a UTF-8 continuation byte in isolation.
+                    fprintf(stderr,"%s:ERROR: Input string (in) appears to be corrupted.\n", __FUNCTION__);
+                    state = ERROR_STATE;
+                } else if (ch =='\\') {
                    state = 1;
                } else {
-                    out[len++] = ch;
+                    if (out != NULL) out[out_len] = ch;
+                    out_len++;
                }
            } break;
            case 1: { // Escaped State ( that is: we've found a '\' character.)
@ -228,26 +232,24 @@ size_t ascii_to_utf8(const char *in, char *out, size_t outSize) {
                    case '\'':
                    case '\"':
                    case '\?':
-                    case '\\': {
-                        out[len++] = ch; state = 0;
-                    } break;
-
-                    case 'a': { out[len++] = '\a';  state = 0; } break;
-                    case 'b': { out[len++] = '\b';  state = 0; } break;
-                    case 'f': { out[len++] = '\f';  state = 0; } break;
-                    case 'n': { out[len++] = '\n';  state = 0; } break;
-                    case 'r': { out[len++] = '\r';  state = 0; } break;
-                    case 't': { out[len++] = '\t';  state = 0; } break;
-                    case 'v': { out[len++] = '\b';  state = 0; } break;
+                    case '\\': { if (out != NULL) out[out_len] = ch; out_len++; state = 0; } break;
+                    case 'a': { if (out != NULL) out[out_len] = '\a'; out_len++; state = 0; } break;
+                    case 'b': { if (out != NULL) out[out_len] = '\b'; out_len++; state = 0; } break;
+                    case 'f': { if (out != NULL) out[out_len] = '\f'; out_len++; state = 0; } break;
+                    case 'n': { if (out != NULL) out[out_len] = '\n'; out_len++; state = 0; } break;
+                    case 'r': { if (out != NULL) out[out_len] = '\r'; out_len++; state = 0; } break;
+                    case 't': { if (out != NULL) out[out_len] = '\t'; out_len++; state = 0; } break;
+                    case 'v': { if (out != NULL) out[out_len] = '\b'; out_len++; state = 0; } break;
                    case 'x': { digitsExpected = 2; state = 2; } break;
                    case 'u': { digitsExpected = 4; state = 2; } break;
                    case 'U': { digitsExpected = 8; state = 2; } break;
                    default : {
+                        state = ERROR_STATE;
                    }
                } // switch ch
            } break;
            case 2: { // Escaped Unicode ( that is: we've found '\x', '\u' or '\U'.)
-                 int digit = 0;
+                 int digit = -1;
                 if (ch >= '0' && ch <= '9') {
                     digit = ch - (int)'0';
                 } else if (ch >= 'A' && ch <= 'F') {
@ -257,108 +259,161 @@ size_t ascii_to_utf8(const char *in, char *out, size_t outSize) {
                 } else {
                     fprintf(stderr,"%s:ERROR: Insufficient hexidecimal digits following"
                                    " \\x, \\u, or \\U escape code in char string (in).\n", __FUNCTION__);
-                     out[0] = 0;
-                     return 0;
+                     state = ERROR_STATE;
                 }
-                 codePoint = codePoint * 16 + digit;
-                 digitsExpected -- ;
-                 if ( digitsExpected == 0 ) {
-                    char temp[4];
-                    size_t count = ucodepoint_to_utf8(codePoint, &temp);
-                    if (count < (outSize-len)) {
-                        memcpy( &out[len], temp, sizeof(char) * count );
-                        len += count;
+                 if (digit >= 0) { 
+                     codePoint = codePoint * 16 + digit;
+                     digitsExpected -- ;
+                     if ( digitsExpected == 0 ) {
+                        char utf8_bytes[4];
+                        size_t n_elements = ucodepoint_to_utf8(codePoint, &utf8_bytes);
                        state = 0;
-                    } else {
-                        fprintf(stderr,"%s:ERROR: Insufficient room in char array (out).\n", __FUNCTION__);
-                        out[0] = 0;
-                        return 0;
-                    }
-                    codePoint = 0;
+                        if (out != NULL) { 
+                            if (out_len + n_elements < outSize) {
+                                memcpy( &out[out_len], utf8_bytes, sizeof(char) * n_elements );
+                            } else {
+                                fprintf(stderr,"%s:ERROR: Insufficient room in char array (out).\n", __FUNCTION__);
+                                state = ERROR_STATE;
+                            }
+                        }
+                        out_len += n_elements;
+                        codePoint = 0;
+                     }
                 }
            } break;
+
+            case 3: { /* Expecting 3 UTF-8 continuation bytes. */
+                if ((ch & 0xc0) == 0x80) {
+                    if (out != NULL) out[out_len] = ch; out_len++; state = 4;
+                } else {
+                    fprintf(stderr,"%s:ERROR: Input (in) appears to be corrupted.\n", __FUNCTION__);
+                    state = ERROR_STATE;
+                }
+            } break;
+
+            case 4: { /* Expecting 2 UTF-8 continuation bytes. */
+                if ((ch & 0xc0) == 0x80) {
+                    if (out != NULL) out[out_len] = ch; out_len++; state = 5;
+                } else {
+                    fprintf(stderr,"%s:ERROR: Input (in) appears to be corrupted.\n", __FUNCTION__);
+                    state = ERROR_STATE;
+                }
+            } break;
+
+            case 5: { /* Expecting 1 UTF-8 continuation byte. */
+                if ((ch & 0xc0) == 0x80) {
+                    if (out != NULL) out[out_len] = ch; out_len++; state = 0;
+                } else {
+                    fprintf(stderr,"%s:ERROR: Input (in) appears to be corrupted.\n", __FUNCTION__);
+                    state = ERROR_STATE;
+                }
+            } break;
+
            default: { 
-                out[0] = 0;
-                return 0;
+                state = ERROR_STATE;
            } break;
        } 
        in ++;
    }
-    out[len] = 0; /* NULL termination of string. */
-    return len;
+    if (state != 0) { /* If we didn't finished in state 0, then we had an error. */
+        out_len = 0;
+    } 
+    if (out != NULL) out[out_len] = 0; /* NULL termination of string. */
+    return out_len;
+}
+
+size_t unescape_to_utf8_len(const char *in) {
+    return unescape_to_utf8( in, NULL, (size_t)0); 
 }

 size_t utf8_to_wchar(const char *in, wchar_t *out, size_t outSize) {

    unsigned int codePoint = 0;
-    size_t len = 0;
+    size_t out_len = 0;
    int state = 0;

-    while (*in != 0) {
+    if (in == NULL) {
+        fprintf(stderr,"%s:ERROR: UTF8 char-pointer (in) is NULL. No conversion performed.\n", __FUNCTION__);
+        return 0;
+    }
+
+    if (out != NULL) out[out_len] = 0;
+
+    while ((*in != 0) && (state != ERROR_STATE)) {
        unsigned char ch = *in;
        switch (state) {
            case 0: {
-                if (ch >= 0xf0) {          // Start of a 4-byte sequence.
+                if (ch >= 0xf0) {          // Start of a 4-byte UTF-8 sequence.
                    codePoint = ch & 0x07; // Extract low 3 bits
                    state = 3;
-                } else if (ch >= 0xe0) {   // Start of a 3-byte sequence.
+                } else if (ch >= 0xe0) {   // Start of a 3-byte UTF-8 sequence.
                    codePoint = ch & 0x0f; // Extract low 4 bits
                    state = 2;
-                } else if (ch >= 0xc0) {   // Start of a 2-byte sequence.
+                } else if (ch >= 0xc0) {   // Start of a 2-byte UTF-8 sequence.
                    codePoint = ch & 0x1f; // Extract low 5 bits
                    state = 1;
-                } else if (ch >= 0x80) {   // We should never find a continuation byte in isolation.
+                } else if (ch >= 0x80) {   // We should never find a UTF-8 continuation byte in isolation.
                    fprintf(stderr,"%s:ERROR: UTF8 string (in) appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                } else {
                    codePoint = ch;        // ASCII        
-                    if ((outSize-len) > 1) {
-                        out[len++] = (wchar_t)codePoint;
-                    } else {
-                        fprintf(stderr,"%s:ERROR: Insufficient room in wchar_t array (out).\n", __FUNCTION__);
-                        state = 99;
+                    if (out != NULL) { 
+                        if ((out_len + 1) < outSize) {
+                            out[out_len] = (wchar_t)codePoint;
+                        } else {
+                            fprintf(stderr,"%s:ERROR: Insufficient room in wchar_t array (out).\n", __FUNCTION__);
+                            state = ERROR_STATE;
+                        }
                    }
+                    out_len++;
                }
            } break;
-            case 1: { // Expecting one continuation byte.
-                if ((ch & 0xc0) == 0x80) { // If the next char is a continuation byte ..
+            case 1: { /* Expecting one continuation byte. */
+                if ((ch & 0xc0) == 0x80) {
                    codePoint = (codePoint << 6) | (ch & 0x3f); // Extract lower 6 bits 
                    state = 0;

                    if (sizeof(wchar_t) == 4) { // wchar_t is UTF-32
-                        int32_t temp;
-                        if ( ucodepoint_to_utf32(codePoint, &temp) > 0) {
-                            if ((outSize-len) > 1) {
-                                out[len++] = (wchar_t)temp;
-                            } else {
-                                fprintf(stderr,"%s:ERROR: Insufficient room in wchar_t array (out).\n", __FUNCTION__);
-                                state = 99;
+                        int32_t utf32_element;
+                        if ( ucodepoint_to_utf32(codePoint, &utf32_element) > 0) {
+                            if (out != NULL) {
+                                if ((out_len + 1) < outSize) {
+                                    out[out_len] = (wchar_t)utf32_element;
+                                } else {
+                                    fprintf(stderr,"%s:ERROR: Insufficient room in wchar_t array (out).\n", __FUNCTION__);
+                                    state = ERROR_STATE;
+                                }
                            }
+                            out_len++;
                        } else {
-                            state = 99;
+                            /* ucodepoint_to_utf32() will have, in this case produced an error message. */  
+                            state = ERROR_STATE;
                        }
-
                    } else if (sizeof(wchar_t) == 2) { // wchar_t is UTF-16
-                        int16_t temp[2];
-                        size_t count;
-                        if (( count = ucodepoint_to_utf16(codePoint, &temp)) > 0) {
-                            if (count < (outSize-len)) {
-                                memcpy( &out[len], temp, sizeof(int16_t) * count );
-                                len += count;
-                            } else {
-                                fprintf(stderr,"%s:ERROR: Insufficient room in wchar_t array (out).\n", __FUNCTION__);
-                                state = 99;
+                        int16_t utf16_elements[2];
+                        size_t n_elements;
+                        if (( n_elements = ucodepoint_to_utf16(codePoint, &utf16_elements)) > 0) {
+                            if (out != NULL) {
+                                if ((out_len + n_elements) < outSize) {
+                                    memcpy( &out[out_len], utf16_elements, sizeof(int16_t) * n_elements);
+                                } else {
+                                    fprintf(stderr,"%s:ERROR: Insufficient room in wchar_t array (out).\n", __FUNCTION__);
+                                    state = ERROR_STATE;
+                                }
                            }
+                            out_len += n_elements;
+                        } else {
+                            /* ucodepoint_to_utf16() will have, in this case produced an error message. */  
+                            state = ERROR_STATE;
                        }
-
                    } else {
                        fprintf(stderr,"%s:ERROR: Unsupported wchar_t size.\n", __FUNCTION__);
-                        state = 99;
+                        state = ERROR_STATE;
                    }

                } else {
                    fprintf(stderr,"%s:ERROR: UTF8 string appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                }
            } break;
            case 2: { /* Expecting two continuation bytes. */
@ -367,7 +422,7 @@ size_t utf8_to_wchar(const char *in, wchar_t *out, size_t outSize) {
                    state = 1;
                } else {
                    fprintf(stderr,"%s:ERROR: UTF8 string appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                }
            } break;
            case 3: { /* Expecting three continuation bytes. */
@ -376,35 +431,38 @@ size_t utf8_to_wchar(const char *in, wchar_t *out, size_t outSize) {
                    state = 2;
                } else {
                    fprintf(stderr,"%s:ERROR: UTF8 string appears to be corrupted.\n", __FUNCTION__);
-                    state = 99;
+                    state = ERROR_STATE;
                }
            } break;
-            default: { /* Error State. */
-                out[0] = 0;
-                return 0;
+            default: {
+                state = ERROR_STATE;
            } break;
        }
        in ++;
    }
+    if (state != 0) { /* If we didn't finish in state 0, it's an error. */
+        out_len = 0;
+    } 
+    if (out != NULL) out[out_len] = 0; /* NULL termination of string. */
+    return out_len;
+}

-    /* If we finished in state 0, then we're good. Just
-       terminate the string, otherwise we had an error. */
-    if (state == 0) {
-        out[len] = 0;
-        return len;
-    } else {
-        out[0] = 0;
-        return 0;
-    }
-    return len;
+size_t utf8_to_wchar_len(const char *in) {
+    return utf8_to_wchar( in, NULL, (size_t)0);
 }

 size_t wchar_to_utf8(const wchar_t *in, char *out, size_t outSize ) {

    unsigned int codePoint = 0;
-    size_t len = 0;
+    size_t out_len = 0;
+    int state = 0;

-    while ( *in != 0 ) {
+    if (in == NULL) {
+        fprintf(stderr,"%s:ERROR: wchar_t-pointer (in) is NULL. No conversion performed.\n", __FUNCTION__);
+        return 0;
+    }
+
+    while ((*in != 0) && (state != ERROR_STATE)) {
        if (*in >= 0xd800 && *in <= 0xdbff)         /* If High-surrogate. */
            codePoint = ((*in - 0xd800) << 10) + 0x10000;
        else {
@ -414,24 +472,33 @@ size_t wchar_to_utf8(const wchar_t *in, char *out, size_t outSize ) {
                codePoint = *in;
            } else {
                fprintf(stderr,"%s:ERROR: Invalid Unicode value.\n", __FUNCTION__);
-                out[0] = 0;
-                return 0;
+                state = ERROR_STATE;
            }

-            char temp[4];
-            size_t count = ucodepoint_to_utf8(codePoint, &temp);
-            if (count < (outSize-len)) {
-                memcpy( &out[len], temp, sizeof(char) * count );
-                len += count;
-            } else {
-                fprintf(stderr,"%s:ERROR: Insufficient room in char array (out).\n", __FUNCTION__);
-                out[0] = 0;
-                return 0;
+            if (state != ERROR_STATE) {
+                char utf8_elements[4];
+                size_t n_elements = ucodepoint_to_utf8(codePoint, &utf8_elements);
+                if (out != NULL) {
+                    if ((out_len + n_elements) < outSize) {
+                        memcpy( &out[out_len], utf8_elements, sizeof(char) * n_elements );
+                    } else {
+                        fprintf(stderr,"%s:ERROR: Insufficient room in char array (out).\n", __FUNCTION__);
+                        state = ERROR_STATE;
+                    }
+                }
+                out_len += n_elements;
+                codePoint = 0;
            }
-            codePoint = 0;
        }
        in++;
    }
-    out[len] = L'\0'; /* NULL termination of string. */
-    return len;
+    if (state != 0) { /* If we didn't finish in state 0, it's an error. */
+        out_len = 0;
+    } 
+    if (out != NULL) out[out_len] = 0; /* NULL termination of string. */
+    return out_len;
+}
+
+size_t wchar_to_utf8_len(const wchar_t *in) {
+    return wchar_to_utf8( in, NULL, (size_t)0);
 }
--- a/trick_source/trick_utils/unicode/test/Makefile
+++ b/trick_source/trick_utils/unicode/test/Makefile
@ -11,7 +11,7 @@ include ${TRICK_HOME}/share/trick/makefiles/Makefile.common
 TRICK_CPPFLAGS += -I$(GTEST_HOME)/include -I$(TRICK_HOME)/include -g -Wall -Wextra -DGTEST_HAS_TR1_TUPLE=0

 TRICK_LIBS = ${TRICK_LIB_DIR}/libtrick.a
-TRICK_EXEC_LINK_LIBS += -L${GTEST_HOME}/lib64 -L${GTEST_HOME}/lib -lgtest -lgtest_main -lpthread
+TRICK_EXEC_LINK_LIBS += -L${GTEST_HOME}/lib64 -L${GTEST_HOME}/lib -lgtest -lpthread

 # Added for Ubuntu... not required for other systems.
 TRICK_EXEC_LINK_LIBS += -lpthread
--- a/trick_source/trick_utils/unicode/test/unicode_utils_test.cpp
+++ b/trick_source/trick_utils/unicode/test/unicode_utils_test.cpp
@ -5,6 +5,11 @@
 #include <gtest/gtest.h>
 #include "trick/unicode_utils.h"

+int main(int argc, char **argv) {
+  ::testing::InitGoogleTest(&argc, argv);
+  return RUN_ALL_TESTS();
+}
+
 const char* ISO_6429_Restore_Default   = "\x1b[00m";
 const char* ISO_6429_Bold              = "\x1b[01m";
 const char* ISO_6429_Underline         = "\x1b[04m";
@ -31,6 +36,12 @@ void Error_Message_Expected() {
    printf("%s\n", ISO_6429_Restore_Default );
 }

+/* The following are the utf-8 encodings of four unicode characters used in the following tests. */
+// Greek Phi Symbol => U+03d5 => 0xcf 0x95                      // see: https://www.compart.com/en/unicode/U+03D5
+// Superscript Latin Small Letter I => U+2071 => 0xe2 0x81 0xb1 // see: https://www.compart.com/en/unicode/U+2071
+// Modifier Letter Small Greek Phi  => U+1D60 => 0xe1 0xb5 0xa0 // see: https://www.compart.com/en/unicode/U+1D60
+// Aegean Number Ten => U+10110 => 0xf0 0x90 0x84 0x90          // see: https://www.compart.com/en/unicode/U+10110
+
 // -------------------------------------------------------
 // Test suite for ucodepoint_to_utf32()
 // -------------------------------------------------------
@ -134,38 +145,39 @@ TEST(ucodepoint_to_utf8, ascii ) {
 }

 // -------------------------------------------------------
-// Test suite for utf8_to_printable_ascii()
+// Test suite for escape_to_ascii()
 // -------------------------------------------------------
-TEST(utf8_to_printable_ascii, null_input ) {
+TEST(escape_to_ascii, null_input ) {
    /* Should generate error message if input character pointer is NULL. */
-    char resultant_ascii_s[128];
+    char output[128];
    char* null_ptr = (char*)0;
    Error_Message_Expected();
-    size_t size = utf8_to_printable_ascii( null_ptr, resultant_ascii_s, sizeof(resultant_ascii_s));
+    size_t size = escape_to_ascii( null_ptr, output, sizeof(output));
    EXPECT_EQ(0, size);
 }

-TEST(utf8_to_printable_ascii, null_output ) {
-    /* Should generate error message if output character pointer is NULL. */
+TEST(escape_to_ascii, null_output ) {
+    /* If output character pointer is NULL, still determine the length. */
    char* null_ptr = (char*)0;
-    const char* input = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)";
-    Error_Message_Expected();
-    size_t size = utf8_to_printable_ascii( input, null_ptr, size_t(5));
-    EXPECT_EQ(0, size);
+    const char* input = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)\n";
+    size_t expected_size = strlen ("e\\u2071\\u1d60 = cos(\\u03d5) + i*sin(\\u03d5)\\n");
+    size_t size = escape_to_ascii( input, null_ptr, size_t(5));
+    EXPECT_EQ(expected_size, size);
 }

-TEST(utf8_to_printable_ascii, normal_1  ) {
-    char resultant_ascii_s[128];
-    /* utf8_to_printable_ascii() should escape all Unicode and non-printable ASCII characters. */
+TEST(escape_to_ascii, normal_1  ) {
+    char output[128];
+    /* escape_to_ascii() should escape all Unicode and non-printable ASCII characters. */
    const char* utf8_s = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)\n";
    const char* expected_ascii_s = "e\\u2071\\u1d60 = cos(\\u03d5) + i*sin(\\u03d5)\\n";
-    (void) utf8_to_printable_ascii( utf8_s, resultant_ascii_s, sizeof(resultant_ascii_s));
-    EXPECT_STREQ(expected_ascii_s, resultant_ascii_s);
+    size_t size = escape_to_ascii( utf8_s, output, sizeof(output));
+    EXPECT_EQ( strlen(expected_ascii_s), size);
+    EXPECT_STREQ(expected_ascii_s, output);
 }

-TEST(utf8_to_printable_ascii, normal_2  ) {
-    char resultant_ascii_s[256];
-    /* utf8_to_printable_ascii() should escape all Unicode and non-printable ASCII characters. */
+TEST(escape_to_ascii, normal_2  ) {
+    char output[256];
+    /* escape_to_ascii() should escape all Unicode and non-printable ASCII characters. */
    const char ascii[128] = {       '\x01','\x02','\x03','\x04','\x05','\x06','\x07','\x08','\x09','\x0a','\x0b','\x0c','\x0d','\x0e','\x0f',
                             '\x10','\x11','\x12','\x13','\x14','\x15','\x16','\x17','\x18','\x19','\x1a','\x1b','\x1c','\x1d','\x1e','\x1f',
                             '\x20','\x21','\x22','\x23','\x24','\x25','\x26','\x27','\x28','\x29','\x2a','\x2b','\x2c','\x2d','\x2e','\x2f',
@ -181,133 +193,147 @@ TEST(utf8_to_printable_ascii, normal_2  ) {
                                   "\\r\\x0e\\x0f\\x10\\x11\\x12\\x13\\x14\\x15\\x16\\x17\\x18\\x19\\x1a\\x1b\\x1c\\x1d\\x1e\\x1f"
                                   " !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~\\x7f";

-    size_t size = utf8_to_printable_ascii( ascii, resultant_ascii_s, sizeof(resultant_ascii_s));
-    EXPECT_EQ(209, size);
-    EXPECT_STREQ(expected_ascii_s, resultant_ascii_s);
+    size_t size = escape_to_ascii( ascii, output, sizeof(output));
+    EXPECT_EQ(strlen(expected_ascii_s), size);
+    EXPECT_STREQ(expected_ascii_s, output);
 }

-/* The following are the utf-8 encodings of four unicode characters used in the following tests. */
-// Greek Phi Symbol => U+03d5 => 0xcf 0x95                      // see: https://www.compart.com/en/unicode/U+03D5
-// Superscript Latin Small Letter I => U+2071 => 0xe2 0x81 0xb1 // see: https://www.compart.com/en/unicode/U+2071
-// Modifier Letter Small Greek Phi  => U+1D60 => 0xe1 0xb5 0xa0 // see: https://www.compart.com/en/unicode/U+1D60
-// Aegean Number Ten => U+10110 => 0xf0 0x90 0x84 0x90          // see: https://www.compart.com/en/unicode/U+10110
+TEST(escape_to_ascii, demotest ) {
+    char output[128];

-TEST(utf8_to_printable_ascii, demotest ) {
-    char resultant_ascii_s[128];
+    /* This test simply demonstrates that the following UTF-8 string (utf8_s),
+       used in subsequent tests, is a well formed UTF-8 string. */

    const char utf8_s[11] = {'P','h','i',' ','=',' ','\xcf','\x95','\0'};
    const char* expected_ascii_s = "Phi = \\u03d5";
-    (void) utf8_to_printable_ascii( utf8_s, resultant_ascii_s, sizeof(resultant_ascii_s));
-    EXPECT_STREQ(expected_ascii_s, resultant_ascii_s);
+
+    size_t size = escape_to_ascii( utf8_s, output, sizeof(output));
+
+    EXPECT_STREQ(expected_ascii_s, output);
+    EXPECT_EQ(strlen(expected_ascii_s), size);
 }

-TEST(utf8_to_printable_ascii, detect_corruption_1 ) {
-    char resultant_ascii_s[128];
-    /* The following string is deliberately corrupted with a spurious
-       continuation character (in corrupted_utf8_s[6]).*/
-    const char corrupted_utf8_s[11] = {'P','h','i',' ','=',' ','\x80','\x95','\0'};
+TEST(escape_to_ascii, detect_corruption_1 ) {
+    char output[128];
+
+    /* The input string is deliberately corrupted with a spurious
+       continuation character.*/
+
+    char utf8_s[11] = {'P','h','i',' ','=',' ','\xcf','\x95','\0'};
+    utf8_s[6] = '\x80'; /* Deliberately corrupt the UTF-8 string. */
+
    Error_Message_Expected();
-    size_t size = utf8_to_printable_ascii( corrupted_utf8_s, resultant_ascii_s, sizeof(resultant_ascii_s));
+    size_t size = escape_to_ascii( utf8_s, output, sizeof(output));
+
    EXPECT_EQ(0, size);
 }

-TEST(utf8_to_printable_ascii, detect_corruption_2 ) {
-    char resultant_ascii_s[128];
+TEST(escape_to_ascii, detect_corruption_2 ) {
+    char output[128];
+
    /* The following string is deliberately corrupted: 0xcf is a header
       for a two-byte sequence, it should be followed by a continuation
       byte (most significant 2 bits are 10). 0x75 starts with 01 */
-    const char corrupted_utf8_s[11] = {'P','h','i',' ','=',' ','\xcf','\x75','\0'};
+
+    char utf8_s[11] = {'P','h','i',' ','=',' ','\xcf','\x95','\0'};
+    utf8_s[7] = '\x75'; /* Deliberately corrupt the UTF-8 string. */
+
    Error_Message_Expected();
-    size_t size = utf8_to_printable_ascii( corrupted_utf8_s, resultant_ascii_s, sizeof(resultant_ascii_s));
+    size_t size = escape_to_ascii( utf8_s, output, sizeof(output));
+
    EXPECT_EQ(0, size);
 }

-TEST(utf8_to_printable_ascii, insufficient_result_array_size ) {
-    /* The result array must be of sufficient size. Here it is not. */
-    char resultant_ascii_s[16];
+TEST(escape_to_ascii, insufficient_result_array_size ) {
+    char output[16];
+
+    /* If the output array pointer is not NULL, it must be of sufficient size. Here it is not. */
    const char* utf8_s = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)\n";
    Error_Message_Expected();
-    size_t size = utf8_to_printable_ascii( utf8_s, resultant_ascii_s, sizeof(resultant_ascii_s));
+    size_t size = escape_to_ascii( utf8_s, output, sizeof(output));
    EXPECT_EQ(0, size);
 }

 // -------------------------------------------------------
-// Test suite for ascii_to_utf8()
+// Test suite for unescape_to_utf8()
 // -------------------------------------------------------

-TEST(ascii_to_utf8, null_input ) {
+TEST(unescape_to_utf8, null_input ) {
    /* Should generate error message if input character pointer is NULL. */
-    char resultant_ascii_s[128];
+    char output[128];
    char* null_ptr = (char*)0;
    Error_Message_Expected();
-    size_t size = ascii_to_utf8( null_ptr, resultant_ascii_s, sizeof(resultant_ascii_s));
+    size_t size = unescape_to_utf8( null_ptr, output, sizeof(output));
    EXPECT_EQ(0, size);
 }

-TEST(ascii_to_utf8, null_output ) {
-    /* Should generate error message if output character pointer is NULL. */
+TEST(unescape_to_utf8, null_output ) {
+    /* Should return the length of the string that would have been produced. */
    char* null_ptr = (char*)0;
-    const char* input = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)";
-
-    Error_Message_Expected();
-    size_t size = ascii_to_utf8( input, null_ptr, size_t(5));
-    EXPECT_EQ(0, size);
+    const char* input = "e\\u2071\\u1d60 = cos(\\u03d5) + i*sin(\\u03d5)\\n";
+    size_t expected_size = strlen("e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)\n");
+    size_t size = unescape_to_utf8( input, null_ptr, size_t(5));
+    EXPECT_EQ(expected_size, size);
 }

-TEST(ascii_to_utf8, normal_1) {
-    /* ascii_to_utf8() should un-escape all escaped ASCII and escaped unicode.
-     */
-    char actual_output[256];
+TEST(unescape_to_utf8, normal_1) {
+    /* unescape_to_utf8() should un-escape all escaped ASCII and escaped unicode,
+       producing a utf8 character string. It should also return the length of
+       that string. */
+    char actual_output[128];
    const char* input = "e\\u2071\\u1d60 = cos(\\u03d5) + i*sin(\\u03d5)\\n";
    const char* expected_output = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)\n";

-    size_t size = ascii_to_utf8(input, actual_output, sizeof(actual_output));
-    EXPECT_EQ(30, size);
+    size_t size = unescape_to_utf8(input, actual_output, sizeof(actual_output));
+
+    EXPECT_EQ( strlen(expected_output), size);
    EXPECT_STREQ(expected_output, actual_output);
 }

-TEST(ascii_to_utf8, non_ascii_chars) {
-    char actual_output[256];
-    /* The input string should only contain ASCII characters, that is,
-       each element should have a value < 128. That isn't the case in the 
-       following string. Therefore, an error message should be emitted.
+TEST(unescape_to_utf8, non_ascii_chars) {
+    char actual_output[128];
+    /* 
+???
     */
-    const char* input = "eⁱᵠ = cos(ϕ) + i*sin(ϕ)";
+    const char* input = "eⁱᵠ = cos(ϕ) + i*sin(\\u03d5)\\n";

    Error_Message_Expected();
-    size_t size = ascii_to_utf8(input, actual_output, sizeof(actual_output));
-    EXPECT_EQ(0, size);
+    size_t size = unescape_to_utf8(input, actual_output, sizeof(actual_output));
+
+    EXPECT_EQ(30, size);
 }

-TEST(ascii_to_utf8, insufficient_hex_digits_1) {
+TEST(unescape_to_utf8, insufficient_hex_digits_1) {
    /* The \U escape code expects exactly 8 hexidecimal digits to follow.
       If fewer than 8 are present, then an error message should result.
       Note: "\U10110" will fail in a C/C++ literal at compile time too,
       because it is incomplete. It should be "\U00010110".
     */
-    char actual_output[256];
+    char actual_output[128];
    const char* input = "Aegean Number Ten = \\U10110\n";

    Error_Message_Expected();
-    size_t size = ascii_to_utf8(input, actual_output, sizeof(actual_output));
+    size_t size = unescape_to_utf8(input, actual_output, sizeof(actual_output));
+
    EXPECT_EQ(0, size);
 }

-TEST(ascii_to_utf8, insufficient_hex_digits_2) {
+TEST(unescape_to_utf8, insufficient_hex_digits_2) {
    /* The \u escape code expects exactly 4 hexidecimal digits to follow.
       If fewer than 4 are present, then an error message should result.
       Note: "\u3d5" will fail in a C/C++ literal at compile time too,
       because it is incomplete. It should be "\u03d5".
     */
-    char actual_output[256];
+    char actual_output[128];
    const char* input = "Phi = \\u3d5\n";

    Error_Message_Expected();
-    size_t size = ascii_to_utf8(input, actual_output, sizeof(actual_output));
+    size_t size = unescape_to_utf8(input, actual_output, sizeof(actual_output));
+
    EXPECT_EQ(0, size);
 }

-TEST(ascii_to_utf8, insufficient_result_array_size) {
+TEST(unescape_to_utf8, insufficient_result_array_size) {
    /* The result array must be of sufficient size. If it isn't, then an error
       message should be emitted.
     */
@ -315,13 +341,31 @@ TEST(ascii_to_utf8, insufficient_result_array_size) {
    const char* input = "e\\u2071\\u1d60 = cos(\\u03d5) + i*sin(\\u03d5)\\n";

    Error_Message_Expected();
-    size_t size = ascii_to_utf8(input, actual_output, sizeof(actual_output));
+    size_t size = unescape_to_utf8(input, actual_output, sizeof(actual_output));
+
    EXPECT_EQ(0, size);
 }

 // -------------------------------------------------------
 // Test suite for utf8_to_wchar()
 // -------------------------------------------------------
+TEST(utf8_to_wchar, null_input ) {
+    /* Should generate error message if input character pointer is NULL. */
+    wchar_t output[128];
+    char* null_ptr = (char*)0;
+    Error_Message_Expected();
+    size_t size = utf8_to_wchar( null_ptr, output, sizeof(output)/sizeof(wchar_t)); 
+    EXPECT_EQ(0, size);
+}
+
+TEST(utf8_to_wchar, null_output ) {
+    /* Should return the length of the string that would have been produced. */
+    wchar_t* null_ptr = (wchar_t*)0;
+    const char* input = "e\u2071\u1d60 = cos(\u03d5) + i*sin(\u03d5)";
+    size_t expected_size = wcslen(L"eⁱᵠ = cos(ϕ) + i*sin(ϕ)");
+    size_t size = utf8_to_wchar( input, null_ptr, size_t(0));
+    EXPECT_EQ(expected_size, size);
+}

 /* The following three tests demonstrate three different ways to
   create the same input string. */
@ -348,7 +392,7 @@ TEST(utf8_to_wchar, test2) {

 TEST(utf8_to_wchar, test3) {
    wchar_t resultant_wchar_s[128];
-    const char input[30] = {'e','\xe2','\x81','\xb1','\xe1', '\xb5','\xa0',' ','=',' ',
+    const char input[30] = {'e','\xe2','\x81','\xb1','\xe1','\xb5','\xa0',' ','=',' ',
                            'c','o','s','(','\xcf','\x95',')',' ','+',' ','i','*','s',
                            'i','n','(','\xcf','\x95',')','\0'};
    const wchar_t* expected_wide_s = L"eⁱᵠ = cos(ϕ) + i*sin(ϕ)";
@ -369,7 +413,7 @@ TEST(utf8_to_wchar, insufficient_result_array_size) {

 TEST(utf8_to_wchar, corrupted_input) {
    wchar_t resultant_wchar_s[128];
-    char input[30] = {'e','\xe2','\x81','\xb1','\xe1', '\xb5','\xa0',' ','=',' ',
+    char input[30] = {'e','\xe2','\x81','\xb1','\xe1','\xb5','\xa0',' ','=',' ',
                      'c','o','s','(','\xcf','\x95',')',' ','+',' ','i','*','s',
                      'i','n','(','\xcf','\x95',')','\0'};

@ -384,8 +428,26 @@ TEST(utf8_to_wchar, corrupted_input) {
 // -------------------------------------------------------
 // Test suite for wchar_to_utf8()
 // -------------------------------------------------------
-TEST(wchar_to_utf8, test1) {
+TEST(wchar_to_utf8, null_input ) {
+    /* Should generate error message if input character pointer is NULL. */
+    wchar_t* null_ptr = (wchar_t*)0;
+    char output[128];
+    Error_Message_Expected();
+    size_t size = wchar_to_utf8( null_ptr, output, sizeof(output)/sizeof(wchar_t)); 
+    EXPECT_EQ(0, size);
+}

+TEST(wchar_to_utf8, null_output ) {
+    /* If output is NULL, still generate the length of the array that would have been produced. */
+    const wchar_t* input = L"eⁱᵠ = cos(ϕ) + i*sin(ϕ)";
+    char* null_ptr = (char*)0;
+    size_t expected_size = strlen("eⁱᵠ = cos(ϕ) + i*sin(ϕ)");
+    size_t size = wchar_to_utf8( input, null_ptr, (size_t)0); 
+    EXPECT_EQ(expected_size, size);
+}
+
+TEST(wchar_to_utf8, test1) {
+    /* Should convert wchar_t array to a UTF-8 array. */
    char resultant_utf8_s[128];
    const wchar_t* wide_s = L"eⁱᵠ = cos(ϕ) + i*sin(ϕ)";
    const char* expected_utf8_s = "eⁱᵠ = cos(ϕ) + i*sin(ϕ)";
@ -395,6 +457,29 @@ TEST(wchar_to_utf8, test1) {
    EXPECT_EQ(true, test_result);
 }

+TEST(wchar_to_utf8, test2) {
+    /* Same test as above, but input is a constrained array. A variant fo this is used below.*/
+    char resultant_utf8_s[128];
+    wchar_t wide_s[32] = { L'e', L'ⁱ', L'ᵠ', L' ', L'=', L' ', L'c', L'o', L's', L'(', L'ϕ', L')',
+                           L' ', L'+', L' ', L'i', L'*', L's', L'i', L'n', L'(', L'ϕ', L')' };
+    const char* expected_utf8_s = "eⁱᵠ = cos(ϕ) + i*sin(ϕ)";
+
+    wchar_to_utf8(wide_s, resultant_utf8_s, sizeof(resultant_utf8_s)/sizeof(char)); 
+    bool test_result = (strcmp(expected_utf8_s, resultant_utf8_s) == 0);
+    EXPECT_EQ(true, test_result);
+}
+
+TEST(wchar_to_utf8, invalid_unicode) {
+    /* Should detect an invalid unicode codepoint. */
+    char resultant_utf8_s[128];
+    wchar_t wide_s[32] = { L'e', L'ⁱ', L'ᵠ', L' ', L'=', L' ', L'c', L'o', L's', L'(', L'ϕ', L')',
+                           L' ', L'+', L' ', L'i', L'*', L's', L'i', L'n', L'(', L'ϕ', L')' };
+    wide_s[14] = (wchar_t)0x110000;
+    Error_Message_Expected();
+    size_t size = wchar_to_utf8(wide_s, resultant_utf8_s, sizeof(resultant_utf8_s)/sizeof(char)); 
+    EXPECT_EQ(0, size);
+}
+
 TEST(wchar_to_utf8, insufficient_result_array_size) {

    char resultant_utf8_s[16];