[odb-users] nvarchar to std::string mapping

Sat May 2 18:32:25 EDT 2020

Hello

If anyone else wants nvarchar / UTF-8 std::string mapping, here's my
implementation. Feel free to use it in your projects.

#ifndef CORE_DB_TYPES_H_
#define CORE_DB_TYPES_H_

#include <odb/mssql/traits.hxx>
#include <string>

namespace odb
{
    namespace mssql
    {
        template <>
        struct value_traits<std::string, id_nstring>
        {
            typedef std::string value_type;
            typedef std::string query_type;
            typedef details::buffer image_type;

            static void
            set_value(std::string &value, const ucs2_char *buffer,
                std::size_t buffer_size, bool is_null);

            static void
            set_image(ucs2_char *buffer, std::size_t buffer_size,
                std::size_t &actual_size, bool &is_null,
                const std::string &value);
        };

        template <>
        struct value_traits<std::string, id_long_nstring>
        {
            typedef std::string value_type;
            typedef std::string query_type;
            typedef long_callback image_type;

            static void
            set_value(std::string &v, result_callback_type& cb, void
*&context);

            static void
            set_image(param_callback_type &cb, const void *&context,
                bool &is_null, const std::string &v);

            static void
            write_callback(const void *context, std::size_t *position,
                const void **buffer, std::size_t *size, chunk_type *chunk,
                void *tmp_buffer, std::size_t tmp_capacity);

            static void
            read_callback(void *context, std::size_t *position, void
**buffer,
                std::size_t *size, chunk_type chunk, std::size_t size_left,
                void *tmp_buffer, std::size_t tmp_capacity);
        };
    }
}

#endif /* CORE_DB_TYPES_H_ */

#include <cstring>
#include <Poco/UTF8Encoding.h>
#include <Poco/UTF16Encoding.h>
#include <Poco/UnicodeConverter.h>
#include "core/db_types.h"
#include "core/log.h"

using Poco::UTF16String;
using Poco::UTF8Encoding;
using Poco::UTF16Encoding;
using Poco::UnicodeConverter;
using namespace std;

namespace odb {
namespace mssql {

void value_traits<string, id_nstring>::set_value(string &value,
    const ucs2_char *buffer, size_t buffer_size, bool is_null)
{
    if(is_null) value = "";
    else UnicodeConverter::convert(buffer, buffer_size, value);
}

void value_traits<string, id_nstring>::set_image(ucs2_char *buffer,
    size_t buffer_size, size_t &actual_size, bool &is_null, const string
&value)
{
    UTF16String utf16_string;
    UnicodeConverter::convert(value, utf16_string);

    is_null = false;
    actual_size = utf16_string.size();
    if(actual_size > buffer_size) actual_size = buffer_size;
    memcpy(buffer, utf16_string.data(), actual_size * sizeof(ucs2_char));
}

void value_traits<string, id_long_nstring>::set_value(string &v,
    result_callback_type &cb, void *&context)
{
    cb = &read_callback;
    context = &v;
}

void value_traits<string, id_long_nstring>::set_image(param_callback_type&
cb,
    const void *&context, bool &is_null, const string &v)
{
    is_null = false;
    cb = &write_callback;
    context = &v;
}

/*
 * The callback function is called before calling SQLPutData(). The
variables
 * pointed to by 'buffer' and 'size' parameters are passed to SQLPutData().
 * If the callback function set the variable pointed to by 'chunk' to the
 * 'chunk_next' value, the operation is repeated.
 */

void value_traits<string, id_long_nstring>::write_callback(
    const void *context,    // User context.
    size_t *position,       // Position context. An implementation is free
                            // to use this to track position information. It
                            // is initialized to zero before the first call.
    const void **buffer,    // [in/out] Buffer contaning the data. On the
                            // the first call it contains a pointer to the
                            // long_callback struct (used for redirections).
    size_t *size,           // [out] Data size.
    chunk_type *chunk,      // [out] The position of this chunk of data.
    void *tmp_buffer,       // A temporary buffer that may be used by the
                            // implementation.
    size_t tmp_capacity     // Capacity of the temporary buffer.
)
{
    UTF8Encoding utf8;
    UTF16Encoding utf16(UTF16Encoding::LITTLE_ENDIAN_BYTE_ORDER);
    const string &value(*static_cast<const string *>(context));

    *buffer = tmp_buffer;
    *size = 0;

    auto tmp_p = (unsigned char *)tmp_buffer;
    auto value_p = (const unsigned char *)value.data() + *position;
    auto value_left = value.size() - *position;
    while(value_left != 0) {
        auto utf8_char_size = utf8.sequenceLength(value_p, value_left);
        if(utf8_char_size <= 0) break;
        if((size_t)utf8_char_size > value_left) break;
        auto ch = utf8.queryConvert(value_p, value_left);
        auto utf16_char_size = utf16.convert(ch, tmp_p, tmp_capacity);
        if((size_t)utf16_char_size > tmp_capacity) break;

        *position += utf8_char_size;
        *size += utf16_char_size;
        tmp_p += utf16_char_size;
        tmp_capacity -= utf16_char_size;
        value_p += utf8_char_size;
        value_left -= utf8_char_size;
    }

    if(value_left != 0) {
        if(tmp_capacity < 4) {
            *chunk = chunk_next;
            return;
        }

        Scope;
        Error << "Truncated UTF-8 data, bytes left " << value_left;
    }
    *chunk = chunk_last;
}

/*
SQLRETURN SQLGetData(
      SQLHSTMT       StatementHandle,
      SQLUSMALLINT   Col_or_Param_Num,
      SQLSMALLINT    TargetType,
      SQLPOINTER     TargetValuePtr,
      SQLLEN         BufferLength,
      SQLLEN *       StrLen_or_IndPtr);

After SQLGetData() is called with BufferLength=0, the callback function is
called for the first time with the 'chunk' parameter set to any of the
following
values:
- chunk_null, if 'StrLen_or_IndPtr' == SQL_NULL_DATA;
- chunk_one, if 'StrLen_or_IndPtr' == 0;
- chunk_first, otherwise.
Since BufferLength=0, the buffer does not contain any data yet.

Unless the 'chunk' was chunk_null, chunk_one or chunk_last, the callback
function must set the variables pointed to by 'buffer' and 'size'
parameters.
SQLGetData() will store at most 'size' bytes to the buffer.
*/

void value_traits<string, id_long_nstring>::read_callback(
    void *context,          // User context.
    size_t *position,       // Position context. An implementation is free
                            // to use this to track position information. It
                            // is initialized to zero before the first call.
    void **buffer,          // [in/out] Buffer to copy the data to. On the
                            // the first call it contains a pointer to the
                            // long_callback struct (used for redirections).
    size_t *size,           // [in/out] In: amount of data copied into the
                            // buffer after the previous call. Out: capacity
                            // of the buffer.
    chunk_type chunk,       // The position of this chunk; chunk_first means
                            // this is the first call, chunk_last means
there
                            // is no more data, chunk_null means this value
is
                            // NULL, and chunk_one means the value is empty.
    size_t size_left,       // Contains the amount of data left or 0 if this
                            // information is not available.
    void *tmp_buffer,       // A temporary buffer that may be used by the
                            // implementation.
    size_t tmp_capacity     // Capacity of the temporary buffer.
)
{
    string &result(*static_cast<string *>(context));
    if(chunk == chunk_null || chunk == chunk_one) {
        result.clear();
        return;
    }

    if(chunk == chunk_first) {
        *buffer = tmp_buffer;
        *size = tmp_capacity;
        return;
    }

    /* Convert at most
     * (char *)(*buffer) + *size - (char *)tmp_buffer
     * bytes containing the UTF16 character sequence from 'tmp_buffer' and
     * append the resulting UTF8 characters to the 'result'. If the sequence
     * is truncated, move the unconverted bytes to the beginning of the
     * 'tmp_buffer', increase the pointer stored in the variable pointed to
     * by 'buffer' by the number of bytes containing the unconverted UTF16
     * characters and decrease the variable pointed to by 'size' by the said
     * number.
     */

    UTF8Encoding utf8;
    UTF16Encoding utf16(UTF16Encoding::LITTLE_ENDIAN_BYTE_ORDER);
    size_t chunk_left = (char *)(*buffer) + *size - (char *)tmp_buffer;
    auto chunk_p = (unsigned char *)(tmp_buffer);
    while(chunk_left != 0) {
        auto char_size = utf16.sequenceLength(chunk_p, chunk_left);
        if(char_size <= 0) break;
        if((size_t)char_size > chunk_left) break;
        auto ch = utf16.queryConvert(chunk_p, chunk_left);

        unsigned char utf8_char[6];
        auto utf8_char_size = utf8.convert(ch, utf8_char,
sizeof(utf8_char));
        result.append((char *)utf8_char, utf8_char_size);

        chunk_p += char_size;
        chunk_left -= char_size;
    }

    if(chunk == chunk_last) {
        if(chunk_left == 0) return;

        Scope;
        Error << "nvarchar contains truncated data, bytes left " <<
chunk_left;
        return;
    }

    if(chunk_left != 0) memmove(tmp_buffer, chunk_p, chunk_left);
    *buffer = (char *)tmp_buffer + chunk_left;
    *size = tmp_capacity - chunk_left;
}

}
}