[odb-users] nvarchar to std::string mapping
Sten Kultakangas
ratkaisut at gmail.com
Sat May 2 18:32:25 EDT 2020
Hello
If anyone else wants nvarchar / UTF-8 std::string mapping, here's my
implementation. Feel free to use it in your projects.
#ifndef CORE_DB_TYPES_H_
#define CORE_DB_TYPES_H_
#include <odb/mssql/traits.hxx>
#include <string>
namespace odb
{
namespace mssql
{
template <>
struct value_traits<std::string, id_nstring>
{
typedef std::string value_type;
typedef std::string query_type;
typedef details::buffer image_type;
static void
set_value(std::string &value, const ucs2_char *buffer,
std::size_t buffer_size, bool is_null);
static void
set_image(ucs2_char *buffer, std::size_t buffer_size,
std::size_t &actual_size, bool &is_null,
const std::string &value);
};
template <>
struct value_traits<std::string, id_long_nstring>
{
typedef std::string value_type;
typedef std::string query_type;
typedef long_callback image_type;
static void
set_value(std::string &v, result_callback_type& cb, void
*&context);
static void
set_image(param_callback_type &cb, const void *&context,
bool &is_null, const std::string &v);
static void
write_callback(const void *context, std::size_t *position,
const void **buffer, std::size_t *size, chunk_type *chunk,
void *tmp_buffer, std::size_t tmp_capacity);
static void
read_callback(void *context, std::size_t *position, void
**buffer,
std::size_t *size, chunk_type chunk, std::size_t size_left,
void *tmp_buffer, std::size_t tmp_capacity);
};
}
}
#endif /* CORE_DB_TYPES_H_ */
#include <cstring>
#include <Poco/UTF8Encoding.h>
#include <Poco/UTF16Encoding.h>
#include <Poco/UnicodeConverter.h>
#include "core/db_types.h"
#include "core/log.h"
using Poco::UTF16String;
using Poco::UTF8Encoding;
using Poco::UTF16Encoding;
using Poco::UnicodeConverter;
using namespace std;
namespace odb {
namespace mssql {
void value_traits<string, id_nstring>::set_value(string &value,
const ucs2_char *buffer, size_t buffer_size, bool is_null)
{
if(is_null) value = "";
else UnicodeConverter::convert(buffer, buffer_size, value);
}
void value_traits<string, id_nstring>::set_image(ucs2_char *buffer,
size_t buffer_size, size_t &actual_size, bool &is_null, const string
&value)
{
UTF16String utf16_string;
UnicodeConverter::convert(value, utf16_string);
is_null = false;
actual_size = utf16_string.size();
if(actual_size > buffer_size) actual_size = buffer_size;
memcpy(buffer, utf16_string.data(), actual_size * sizeof(ucs2_char));
}
void value_traits<string, id_long_nstring>::set_value(string &v,
result_callback_type &cb, void *&context)
{
cb = &read_callback;
context = &v;
}
void value_traits<string, id_long_nstring>::set_image(param_callback_type&
cb,
const void *&context, bool &is_null, const string &v)
{
is_null = false;
cb = &write_callback;
context = &v;
}
/*
* The callback function is called before calling SQLPutData(). The
variables
* pointed to by 'buffer' and 'size' parameters are passed to SQLPutData().
* If the callback function set the variable pointed to by 'chunk' to the
* 'chunk_next' value, the operation is repeated.
*/
void value_traits<string, id_long_nstring>::write_callback(
const void *context, // User context.
size_t *position, // Position context. An implementation is free
// to use this to track position information. It
// is initialized to zero before the first call.
const void **buffer, // [in/out] Buffer contaning the data. On the
// the first call it contains a pointer to the
// long_callback struct (used for redirections).
size_t *size, // [out] Data size.
chunk_type *chunk, // [out] The position of this chunk of data.
void *tmp_buffer, // A temporary buffer that may be used by the
// implementation.
size_t tmp_capacity // Capacity of the temporary buffer.
)
{
UTF8Encoding utf8;
UTF16Encoding utf16(UTF16Encoding::LITTLE_ENDIAN_BYTE_ORDER);
const string &value(*static_cast<const string *>(context));
*buffer = tmp_buffer;
*size = 0;
auto tmp_p = (unsigned char *)tmp_buffer;
auto value_p = (const unsigned char *)value.data() + *position;
auto value_left = value.size() - *position;
while(value_left != 0) {
auto utf8_char_size = utf8.sequenceLength(value_p, value_left);
if(utf8_char_size <= 0) break;
if((size_t)utf8_char_size > value_left) break;
auto ch = utf8.queryConvert(value_p, value_left);
auto utf16_char_size = utf16.convert(ch, tmp_p, tmp_capacity);
if((size_t)utf16_char_size > tmp_capacity) break;
*position += utf8_char_size;
*size += utf16_char_size;
tmp_p += utf16_char_size;
tmp_capacity -= utf16_char_size;
value_p += utf8_char_size;
value_left -= utf8_char_size;
}
if(value_left != 0) {
if(tmp_capacity < 4) {
*chunk = chunk_next;
return;
}
Scope;
Error << "Truncated UTF-8 data, bytes left " << value_left;
}
*chunk = chunk_last;
}
/*
SQLRETURN SQLGetData(
SQLHSTMT StatementHandle,
SQLUSMALLINT Col_or_Param_Num,
SQLSMALLINT TargetType,
SQLPOINTER TargetValuePtr,
SQLLEN BufferLength,
SQLLEN * StrLen_or_IndPtr);
After SQLGetData() is called with BufferLength=0, the callback function is
called for the first time with the 'chunk' parameter set to any of the
following
values:
- chunk_null, if 'StrLen_or_IndPtr' == SQL_NULL_DATA;
- chunk_one, if 'StrLen_or_IndPtr' == 0;
- chunk_first, otherwise.
Since BufferLength=0, the buffer does not contain any data yet.
Unless the 'chunk' was chunk_null, chunk_one or chunk_last, the callback
function must set the variables pointed to by 'buffer' and 'size'
parameters.
SQLGetData() will store at most 'size' bytes to the buffer.
*/
void value_traits<string, id_long_nstring>::read_callback(
void *context, // User context.
size_t *position, // Position context. An implementation is free
// to use this to track position information. It
// is initialized to zero before the first call.
void **buffer, // [in/out] Buffer to copy the data to. On the
// the first call it contains a pointer to the
// long_callback struct (used for redirections).
size_t *size, // [in/out] In: amount of data copied into the
// buffer after the previous call. Out: capacity
// of the buffer.
chunk_type chunk, // The position of this chunk; chunk_first means
// this is the first call, chunk_last means
there
// is no more data, chunk_null means this value
is
// NULL, and chunk_one means the value is empty.
size_t size_left, // Contains the amount of data left or 0 if this
// information is not available.
void *tmp_buffer, // A temporary buffer that may be used by the
// implementation.
size_t tmp_capacity // Capacity of the temporary buffer.
)
{
string &result(*static_cast<string *>(context));
if(chunk == chunk_null || chunk == chunk_one) {
result.clear();
return;
}
if(chunk == chunk_first) {
*buffer = tmp_buffer;
*size = tmp_capacity;
return;
}
/* Convert at most
* (char *)(*buffer) + *size - (char *)tmp_buffer
* bytes containing the UTF16 character sequence from 'tmp_buffer' and
* append the resulting UTF8 characters to the 'result'. If the sequence
* is truncated, move the unconverted bytes to the beginning of the
* 'tmp_buffer', increase the pointer stored in the variable pointed to
* by 'buffer' by the number of bytes containing the unconverted UTF16
* characters and decrease the variable pointed to by 'size' by the said
* number.
*/
UTF8Encoding utf8;
UTF16Encoding utf16(UTF16Encoding::LITTLE_ENDIAN_BYTE_ORDER);
size_t chunk_left = (char *)(*buffer) + *size - (char *)tmp_buffer;
auto chunk_p = (unsigned char *)(tmp_buffer);
while(chunk_left != 0) {
auto char_size = utf16.sequenceLength(chunk_p, chunk_left);
if(char_size <= 0) break;
if((size_t)char_size > chunk_left) break;
auto ch = utf16.queryConvert(chunk_p, chunk_left);
unsigned char utf8_char[6];
auto utf8_char_size = utf8.convert(ch, utf8_char,
sizeof(utf8_char));
result.append((char *)utf8_char, utf8_char_size);
chunk_p += char_size;
chunk_left -= char_size;
}
if(chunk == chunk_last) {
if(chunk_left == 0) return;
Scope;
Error << "nvarchar contains truncated data, bytes left " <<
chunk_left;
return;
}
if(chunk_left != 0) memmove(tmp_buffer, chunk_p, chunk_left);
*buffer = (char *)tmp_buffer + chunk_left;
*size = tmp_capacity - chunk_left;
}
}
}
More information about the odb-users
mailing list