Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

{171428666}: column character encoding #4990

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 19 additions & 13 deletions db/types.c
Original file line number Diff line number Diff line change
Expand Up @@ -3695,6 +3695,21 @@ TYPES_INLINE int CLIENT_BLOB_to_CLIENT_PSTR2(
return -1;
}

static int utf8_validate_permitting_trailing_zeros(const char *u, int max)
{
int valid_len;

if (utf8_validate(u, max, &valid_len) != 0)
return -1;

/* utf8_validate() stops at the 1st NUL character. We want to permit trailing zeros */
for (; valid_len < max - 1; ++valid_len) {
if (u[valid_len] != '\0')
return -1;
}
return 0;
}

/**
* Finds out where the input vutf8 string is stored and then determines where it
* should be copied and copies it. Doesn't deal with NULLs.
Expand All @@ -3717,7 +3732,6 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,
blob_buffer_t *inblob,
blob_buffer_t *outblob, int *outdtsz)
{
int valid_len;
if (out_len > 0)
memset(out, 0, out_len);

Expand All @@ -3742,10 +3756,8 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,
/* validate input blob */
assert(inblob->length == len);

if (utf8_validate(inblob->data, inblob->length, &valid_len) ||
valid_len != len - 1) {
if (utf8_validate_permitting_trailing_zeros(inblob->data, inblob->length))
return -1;
}

memcpy(outblob, inblob, sizeof(blob_buffer_t));
bzero(inblob, sizeof(blob_buffer_t));
Expand All @@ -3767,8 +3779,7 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,

/* if the string isn't empty, validate the string and make sure its
* length matches len (minus 1 for the NUL byte) */
if (len > 0 &&
(utf8_validate(in, len, &valid_len) || valid_len != len - 1))
if (len > 0 && utf8_validate_permitting_trailing_zeros(in, len))
return -1;

memcpy(out, in, len);
Expand All @@ -3785,7 +3796,6 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,
* fit in the out buffer, then the string needs to be copied from the in
* buffer to a new out blob */
else if (len <= in_len) {
int valid_len;

if (outblob) {
if (len > gbl_blob_sz_thresh_bytes)
Expand All @@ -3800,8 +3810,7 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,

/* if the string isn't empty, validate the string and make sure its
* length matches len (minus 1 for the NUL byte) */
if (len > 0 &&
(utf8_validate(in, len, &valid_len) || valid_len != len - 1))
if (len > 0 && utf8_validate_permitting_trailing_zeros(in, len))
return -1;

memcpy(outblob->data, in, len);
Expand All @@ -3821,8 +3830,6 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,
* blob to the out buffer */
else /* len <= out_len */
{
int valid_len;

/* Do not attempt to convert a blob placeholder (i.e., length == -2) */
if (inblob && inblob->length != OSQL_BLOB_FILLER_LENGTH) {
if (!inblob->exists || !inblob->data) {
Expand All @@ -3832,8 +3839,7 @@ static TYPES_INLINE int vutf8_convert(int len, const void *in, int in_len,

/* if the string isn't empty, validate the string and make sure its
* length matches len (minus 1 for the NUL byte) */
if (len > 0 && (utf8_validate(inblob->data, len, &valid_len) ||
valid_len != len - 1))
if (len > 0 && utf8_validate_permitting_trailing_zeros(inblob->data, len))
return -1;

memcpy(out, inblob->data, len);
Expand Down
Binary file modified docs/images/alter-table-ddl.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Binary file modified docs/images/column-constraint.gif
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
12 changes: 12 additions & 0 deletions docs/src/sqlitegen/bubble-generator-data.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,12 @@ stack
}
}
{line OPTION DBPAD = signed-number }
{line ENCODING
{or
{line /string-literal}
{line NONE}
}
}
}

table-constraint {
Expand Down Expand Up @@ -828,6 +834,12 @@ stack
}
NOT NULL
}
{line ENCODING
{or
{line /string-literal}
{line NONE}
}
}
}
}
{line OPTIONS ( table-options ) }
Expand Down
7 changes: 5 additions & 2 deletions schemachange/sc_records.c
Original file line number Diff line number Diff line change
Expand Up @@ -545,12 +545,12 @@ static int prepare_and_verify_newdb_record(struct convert_record_data *data,
if (rc < 0) {
logmsg(LOGMSG_DEBUG, "%s:%d internal error during CHECK constraint\n",
__func__, __LINE__);
return ERR_CONSTR;
return ERR_CHECK_CONSTRAINT;
} else if (rc > 0) {
logmsg(LOGMSG_DEBUG, "%s:%d CHECK constraint failed for '%s'\n",
__func__, __LINE__,
data->iq.usedb->check_constraints[rc - 1].consname);
return ERR_CONSTR;
return ERR_CHECK_CONSTRAINT;
}

rc = verify_record_constraint(&data->iq, data->to, data->trans, p_buf_data,
Expand Down Expand Up @@ -1123,6 +1123,9 @@ static int convert_record(struct convert_record_data *data)
} else if (rc == ERR_VERIFY_PI) {
sc_client_error(data->s, "Error verifying partial indexes! rrn %d genid 0x%llx", rrn, genid);
return -2;
} else if (rc == ERR_CHECK_CONSTRAINT) {
sc_client_error(data->s, "Record violates check constraints rrn %d genid 0x%llx", rrn, genid);
return -2;
} else if (rc != 0) {
sc_client_error(data->s,
"Error adding record rcode %d opfailcode %d ixfailnum %d rrn %d genid 0x%llx, stripe %d", rc,
Expand Down
70 changes: 66 additions & 4 deletions sqlite/src/comdb2build.c
Original file line number Diff line number Diff line change
Expand Up @@ -6406,7 +6406,7 @@ void comdb2DeferForeignKey(Parse *pParse, int isDeferred)
return;
}

static void drop_constraint(Parse *pParse, Token *pName, int type)
static void drop_constraint(Parse *pParse, Token *pName, int type, int hush)
{
if (comdb2IsPrepareOnly(pParse))
return;
Expand All @@ -6433,7 +6433,7 @@ static void drop_constraint(Parse *pParse, Token *pName, int type)
if (cons) {
/* Mark it as dropped. */
cons->flags |= CONS_DELETED;
} else {
} else if (!hush) {
pParse->rc = SQLITE_ERROR;
sqlite3ErrorMsg(pParse, "Constraint '%s' not found.", name);
goto cleanup;
Expand All @@ -6454,15 +6454,15 @@ void comdb2DropForeignKey(Parse *pParse, /* Parser context */
Token *pName /* Foreign key name */
)
{
drop_constraint(pParse, pName, CONS_FKEY);
drop_constraint(pParse, pName, CONS_FKEY, 0);
return;
}

void comdb2DropConstraint(Parse *pParse, /* Parser context */
Token *pName /* Foreign key name */
)
{
drop_constraint(pParse, pName, CONS_ALL);
drop_constraint(pParse, pName, CONS_ALL, 0);
return;
}

Expand Down Expand Up @@ -7690,3 +7690,65 @@ void create_default_consumer_sp(Parse *p, char *spname)
comdb2prepareNoRows(v, p, 0, sc, &comdb2SqlSchemaChange, (vdbeFuncArgFree)&free_schema_change_type);

}

void comdb2ChangeCharacterSet(Parse *pParse, Token *t, int alter)
{
struct comdb2_ddl_context *ctx;
struct comdb2_column *column;
sqlite3 *db = pParse->db;

char *charset = NULL;
char expr[MAXCOLNAME + sizeof("utf8_validate()=0")];
char constraint_name[MAXCOLNAME + sizeof("$" GEN_CONS_PREFIX "_CHAR_ENC_")];
int nw;

Token colToken;
Token funcToken;
ExprList *arg;
Expr *func;
Expr *zero;
Expr *equality;

if (t != NULL) {
charset = sqlite3NameFromToken(db, t);
if (charset == NULL)
return;

/* so far only utf8 is supported */
if (strcasecmp(charset, "utf8") != 0 && strcasecmp(charset, "utf-8") != 0) {
setError(pParse, SQLITE_MISUSE, "unknown charset");
goto out;
}
}

ctx = pParse->comdb2_ddl_ctx;
if (alter)
column = ctx->alter_column;
else
column = (struct comdb2_column *)LISTC_BOT(&ctx->schema->column_list);

if (column->type != SQL_TYPE_CSTRING && column->type != SQL_TYPE_VARCHAR && column->type != SQL_TYPE_CHAR) {
setError(pParse, SQLITE_MISUSE, "invalid column type to use character encoding");
goto out;
}

snprintf(constraint_name, sizeof(constraint_name), "$" GEN_CONS_PREFIX "_CHAR_ENC_%s", column->name);
sqlite3TokenInit(&pParse->constraintName, constraint_name);

if (t == NULL) {
drop_constraint(pParse, &pParse->constraintName, CONS_CHECK, 1);
} else {
sqlite3TokenInit(&colToken, column->name);
sqlite3TokenInit(&funcToken, "utf8_validate");

arg = sqlite3ExprListAppend(pParse, NULL, sqlite3ExprAlloc(db, TK_ID, &colToken, 0));
func = sqlite3ExprFunction(pParse, arg, &funcToken, 0);
zero = sqlite3ExprAlloc(db, TK_INTEGER, &sqlite3IntTokens[0], 0);

equality = sqlite3PExpr(pParse, TK_EQ, func, zero);
nw = snprintf(expr, sizeof(expr), "utf8_validate(%s)=0", column->name);
comdb2AddCheckConstraint(pParse, equality, expr, expr + nw + 1);
}
out:
sqlite3DbFree(db, charset);
}
1 change: 1 addition & 0 deletions sqlite/src/comdb2build.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ void comdb2AddIndex(Parse *, Token *, ExprList *, int, Expr *, const char *,
const char *, int, u8, int, ExprList *);
void comdb2AddDbpad(Parse *, int);
void comdb2AddCheckConstraint(Parse *, Expr *, const char *, const char *);
void comdb2ChangeCharacterSet(Parse *pParse, Token *, int);
void comdb2CreateIndex(Parse *, Token *, Token *, SrcList *, ExprList *, int,
Token *, Expr *, const char *, const char *, int, int,
u8, int, ExprList *, int);
Expand Down
31 changes: 31 additions & 0 deletions sqlite/src/func.c
Original file line number Diff line number Diff line change
Expand Up @@ -1418,6 +1418,36 @@ static void uncompressGzipFunc(
return;
}

/* Return 0 if payload is utf8. Return (-N - 1), where N is the index
* of the first malformed character */
int utf8_validate(const char *str, int len, int *valid_len);
static void comdb2Utf8ValidateFunc(
sqlite3_context *context,
int argc,
sqlite3_value **argv
){
int valid_len, rc, len;
const char *z;
assert(argc == 1);
UNUSED_PARAMETER(argc);

switch( sqlite3_value_type(argv[0]) ){
case SQLITE_BLOB:
len = sqlite3_value_bytes(argv[0]);
z = sqlite3_value_blob(argv[0]);
rc = utf8_validate(z, len, &valid_len);
break;
case SQLITE_TEXT:
len = sqlite3_value_bytes(argv[0]) + 1; /* +1 for \0 */
z = (const char *)sqlite3_value_text(argv[0]);
rc = utf8_validate(z, len, &valid_len);
break;
default:
rc = -1;
break;
}
sqlite3_result_int(context, rc == 0 ? rc : (-valid_len - 1));
}
#endif /* defined(SQLITE_BUILDING_FOR_COMDB2) */

/*
Expand Down Expand Up @@ -3093,6 +3123,7 @@ void sqlite3RegisterBuiltinFunctions(void){
FUNCTION(comdb2_starttime, 0, 0, 0, comdb2StartTimeFunc),
FUNCTION(comdb2_user, 0, 0, 0, comdb2UserFunc),
FUNCTION(comdb2_last_cost, 0, 0, 0, comdb2LastCostFunc),
FUNCTION(utf8_validate, 1, 0, 0, comdb2Utf8ValidateFunc),
FUNCTION(checksum_md5, 1, 0, 0, md5Func),
FUNCTION(compress, 1, 0, 0, compressFunc),
FUNCTION(uncompress, 1, 0, 0, uncompressFunc),
Expand Down
8 changes: 8 additions & 0 deletions sqlite/src/parse.y
Original file line number Diff line number Diff line change
Expand Up @@ -542,6 +542,8 @@ ccons ::= PRIMARY KEY sortorder(Z) onconf(R) autoinc(I).
{sqlite3AddPrimaryKey(pParse,0,R,I,Z);}
%endif !SQLITE_BUILDING_FOR_COMDB2
%ifdef SQLITE_BUILDING_FOR_COMDB2
ccons ::= ENCODING STRING(H). {comdb2ChangeCharacterSet(pParse,&H,0);}
ccons ::= ENCODING NONE. {comdb2ChangeCharacterSet(pParse,NULL,0);}
ccons ::= UNIQUE onconf(R). {
comdb2AddIndex(pParse, 0, 0, R, 0, 0, 0, SQLITE_SO_ASC,
SQLITE_IDXTYPE_UNIQUE, 0, 0);
Expand Down Expand Up @@ -2037,6 +2039,12 @@ alter_table_alter_column_cmd ::= SET NOT NULL. {
alter_table_alter_column_cmd ::= DROP NOT NULL. {
comdb2AlterColumnDropNotNull(pParse);
}
alter_table_alter_column_cmd ::= ENCODING STRING(H). {
comdb2ChangeCharacterSet(pParse,&H,1);
}
alter_table_alter_column_cmd ::= ENCODING NONE. {
comdb2ChangeCharacterSet(pParse,NULL,1);
}
alter_table_alter_column ::= alter_table_alter_column_start
alter_table_alter_column_cmd. {
comdb2AlterColumnEnd(pParse);
Expand Down
1 change: 1 addition & 0 deletions sqlite/tool/mkkeywordhash.c
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ static Keyword aKeywordTable[] = {
{ "DISTINCT", "TK_DISTINCT", ALWAYS },
{ "DO", "TK_DO", UPSERT },
{ "DROP", "TK_DROP", ALWAYS },
{ "ENCODING", "TK_ENCODING", ALWAYS },
{ "END", "TK_END", ALWAYS },
{ "EACH", "TK_EACH", TRIGGER },
{ "ELSE", "TK_ELSE", ALWAYS },
Expand Down
2 changes: 2 additions & 0 deletions tests/auth.test/t09.expected
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@
(candidate='EACH')
(candidate='ELSE')
(candidate='ENABLE')
(candidate='ENCODING')
(candidate='END')
(candidate='ESCAPE')
(candidate='EXCEPT')
Expand Down Expand Up @@ -377,6 +378,7 @@
(candidate='unlikely()')
(candidate='upper()')
(candidate='usleep()')
(candidate='utf8_validate()')
(candidate='zeroblob()')
(username='user1')
(username='user2')
Expand Down
5 changes: 3 additions & 2 deletions tests/comdb2sys.test/comdb2sys.expected
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,9 @@
(tablename='t3', bytes=73728)
(tablename='t4', bytes=73728)
[select * from comdb2_tablesizes order by tablename] rc 0
(KEYWORDS_COUNT=223)
(KEYWORDS_COUNT=224)
[SELECT COUNT(*) AS KEYWORDS_COUNT FROM comdb2_keywords] rc 0
(RESERVED_KW=66)
(RESERVED_KW=67)
[SELECT COUNT(*) AS RESERVED_KW FROM comdb2_keywords WHERE reserved = 'Y'] rc 0
(NONRESERVED_KW=157)
[SELECT COUNT(*) AS NONRESERVED_KW FROM comdb2_keywords WHERE reserved = 'N'] rc 0
Expand All @@ -104,6 +104,7 @@
(name='DISTINCT', reserved='Y')
(name='DROP', reserved='Y')
(name='ELSE', reserved='Y')
(name='ENCODING', reserved='Y')
(name='ESCAPE', reserved='Y')
(name='EXCEPT', reserved='Y')
(name='EXISTS', reserved='Y')
Expand Down
2 changes: 1 addition & 1 deletion tests/ddl_no_csc2.test/t09_check.expected
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
')
(rows inserted=1)
(rows inserted=1)
[ALTER TABLE t1 ADD CONSTRAINT valid_colors CHECK (color IN ('red', 'green', 'blue'))] failed with rc 240 Record violates foreign constraints rrn xx genid xx
[ALTER TABLE t1 ADD CONSTRAINT valid_colors CHECK (color IN ('red', 'green', 'blue'))] failed with rc 240 Record violates check constraints rrn xx genid xx
(csc2='schema
{
cstring color[11] null = yes
Expand Down
17 changes: 17 additions & 0 deletions tests/ddl_no_csc2.test/t15_encoding.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[CREATE TABLE t15(a INTEGER ENCODING 'ascii')] failed with rc -3 unknown charset
[CREATE TABLE t15(a INTEGER ENCODING 'utf8')] failed with rc -3 invalid column type to use character encoding
[CREATE TABLE t15(a TEXT ENCODING 'utf8')] failed with rc -3 invalid column type to use character encoding
(csc2='schema
{
cstring a[11] null = yes
}
constraints
{
check "$CONSTRAINT_CHAR_ENC_a" = {where utf8_validate(a)=0}
}
')
[INSERT INTO t15 VALUES (CAST(x'616263FF616263' AS TEXT))] failed with rc 403 CHECK constraint violation CHECK constraint failed for '$CONSTRAINT_CHAR_ENC_a' unable to add record rc = 320
(COUNT(*)=0)
(rows inserted=1)
(COUNT(*)=1)
[ALTER TABLE t15 ALTER COLUMN a ENCODING 'utf8'] failed with rc 240 Record violates check constraints rrn xx genid xx
Loading