Skip to content

Commit

Permalink
flush_tok and numbers validation fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
ony committed Mar 7, 2014
1 parent ce7fc64 commit 6d253a2
Show file tree
Hide file tree
Showing 7 changed files with 246 additions and 42 deletions.
4 changes: 0 additions & 4 deletions TODO.md
Original file line number Diff line number Diff line change
@@ -1,5 +1 @@
- string should remember slice and move it to buf on PJ_STARVING
- drop useless S_SPACE and use S_INIT/S_VALUE/S_STR_VALUE instead
- add tests for chunked numbers
- re-factor F_BUF flag (looks like there is no need in it)
- finish pj_feed_end()
57 changes: 31 additions & 26 deletions src/pjson.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,15 +41,6 @@ void pj_feed(pj_parser_ref parser, const char *chunk, size_t len)
return;
}

if (pj_use_buf(parser))
{
/* relocate last partial token to buffer start */
size_t prev_chunk_len = parser->buf_ptr - parser->buf_last;
(void) memmove(parser->buf, parser->buf_last, prev_chunk_len);
parser->buf_ptr = parser->buf + prev_chunk_len;
parser->buf_last = parser->buf;
}

parser->chunk = chunk;
parser->ptr = chunk;
parser->chunk_end = chunk + len;
Expand Down Expand Up @@ -92,37 +83,51 @@ void pj_poll(pj_parser_ref parser, pj_token *tokens, size_t len)
assert( parser != NULL );
assert( len > 0 );
assert( tokens != NULL );
assert( parser->state != S_ERR ); /* already reported an error */

if (len == 0) return; /* nothing to fill */

pj_token *tokens_end = tokens + len;

if (pj_is_end(parser))
{
if (parser->state != S_END && parser->state != S_ERR)
if (pj_state(parser) != S_END)
{
pj_flush_tok(parser, tokens);

if (parser->state == S_END && parser->state == S_ERR)
TRACE_TOKEN(tokens);
TRACE_PARSER(parser, parser->ptr);

if (parser->state == S_END || parser->state == S_ERR)
return;

parser->state = S_END;
/* next token to fill if possible */
if (++tokens == tokens_end)
{
/* we should re-enter this code to give back PJ_END */
parser->state = pj_new_state(parser, S_END);
return;
}
}
tokens->token_type = PJ_END;
parser->state = S_END; /* this is final PJ_END */
return;
}

/* next token */
if (++tokens == tokens_end) return;

/* in one of the terminal states? */
if (parser->state == S_END)
{
tokens->token_type = PJ_END;
return;
}
else if (parser->state == S_ERR)
{
tokens->token_type = PJ_ERR;
return;
}
/* prepare suppl. buffer */
if (pj_use_buf(parser)) /* have incomplete token? */
{
/* relocate last partial token to buffer start */
size_t prev_chunk_len = parser->buf_ptr - parser->buf_last;
(void) memmove(parser->buf, parser->buf_last, prev_chunk_len);
parser->buf_ptr = parser->buf + prev_chunk_len;
parser->buf_last = parser->buf;
}
else
{
/* free prev tokens from suppl. buffer */
parser->buf_ptr = parser->buf;
parser->buf_last = parser->buf;
}

for (; tokens != tokens_end && pj_poll_tok(parser, tokens); ++tokens)
Expand Down
28 changes: 24 additions & 4 deletions src/pjson_general.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,15 +35,32 @@ static bool pj_poll_tok(pj_parser_ref parser, pj_token *token);
/* parsing internals */
static void pj_flush_tok(pj_parser_ref parser, pj_token *token)
{
TRACE_FUNC();
assert( pj_state(parser) != S_ERR );
assert( pj_state(parser) != S_END );
if (pj_is_end(parser) && !pj_use_buf(parser))

if (pj_is_end(parser))
{
parser->state = S_END;
token->token_type = PJ_END;
if (pj_use_buf(parser))
{
switch (pj_state(parser))
{
case S_NUM ... S_NUM_END:
pj_number_flush(parser, token);
break;
default:
pj_err_tok(parser, token);
}
}
else
{
/* nothing to flush */
parser->state = S_END;
token->token_type = PJ_END;
}
return;
}
/* TODO: finish token */
/* all other tokens are simply incomplete */
pj_err_tok(parser, token);
}

Expand All @@ -55,6 +72,9 @@ static const char
static bool pj_poll_tok(pj_parser_ref parser, pj_token *token)
{
TRACE_FUNC();
assert( pj_state(parser) != S_ERR );
assert( pj_state(parser) != S_END );

const char *p = parser->ptr;
const char * const p_end = parser->chunk_end;
state s = pj_state(parser);
Expand Down
28 changes: 22 additions & 6 deletions src/pjson_number.h
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ static bool pj_number_end(pj_parser_ref parser, pj_token *token, state s, const
{
if (!pj_buf_tok(parser, token, p, S_VALUE, PJ_TOK_NUM))
{
parser->state = s | F_BUF;
parser->state = pj_new_state(parser, s) | F_BUF;
return false;
}
}
Expand All @@ -44,6 +44,25 @@ static bool pj_number_end(pj_parser_ref parser, pj_token *token, state s, const
return true;
}

static void pj_number_flush(pj_parser_ref parser, pj_token *token)
{
TRACE_FUNC();
assert( parser->ptr == parser->chunk_end );

const state s = pj_state(parser);
switch (s)
{
case S_MAGN_Z:
case S_MAGN_G:
case S_FRAC_NUM:
case S_EXP_NUM:
(void) pj_number_end(parser, token, s, parser->ptr);
break;
default:
pj_err_tok(parser, token);
}
}

static bool pj_exponent_number(pj_parser_ref parser, pj_token *token, const char *p)
{
TRACE_FUNC();
Expand Down Expand Up @@ -172,13 +191,10 @@ static bool pj_fraction_start(pj_parser_ref parser, pj_token *token, const char
switch (*p)
{
case '0' ... '9': return pj_fraction_number(parser, token, ++p);
case 'e': case 'E':
case '-': case '+': case '.':
pj_err_tok(parser, token);
return false;

default:
return pj_number_end(parser, token, S_FRAC, p);
pj_err_tok(parser, token);
return false;
}
}

Expand Down
7 changes: 5 additions & 2 deletions src/pjson_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,9 @@ typedef enum {
static state pj_state(pj_parser_ref parser)
{ return (parser->state & 0xff ); }

static int pj_new_state(pj_parser_ref parser, state s)
{ return (parser->state & ~0xff) | s; }

static bool pj_is_end(pj_parser_ref parser)
{ return (parser->state & F_END ); }

Expand Down Expand Up @@ -108,7 +111,7 @@ static void pj_part_tok(pj_parser_ref parser, pj_token *token, state s, const ch
{
TRACE_FUNC();
assert( p == parser->chunk_end );
assert( parser->buf <= parser->buf_last && parser->buf_last <= parser->buf_ptr );
assert( !pj_use_buf(parser) || (parser->buf <= parser->buf_last && parser->buf_last <= parser->buf_ptr) );

parser->state = s;
if (p > parser->chunk)
Expand All @@ -132,7 +135,7 @@ static void pj_tok(pj_parser_ref parser, pj_token *token,
{
parser->ptr = p;
parser->chunk = p;
parser->state = s;
parser->state = pj_new_state(parser, s);
token->token_type = tok;
}

Expand Down
145 changes: 145 additions & 0 deletions test/number.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,13 @@ TEST(number, reals)
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "-0.5", string(tokens[0].str, tokens[0].len) );

pj_init(&parser, 0, 0);
sample = "32. ";
pj_feed(&parser, sample);

pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_ERR, tokens[0].token_type );
}

TEST(number, eng_reals)
Expand Down Expand Up @@ -199,3 +206,141 @@ TEST(number, chunked)
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "3.141592", string(tokens[0].str, tokens[0].len) );
}

TEST(number, integer_final)
{
pj_parser parser;
char buf[256];
pj_init(&parser, buf, sizeof(buf));

pj_feed(&parser, "42");

array<pj_token, 3> tokens;

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "42", string(tokens[0].str, tokens[0].len) );
EXPECT_EQ( PJ_END, tokens[1].token_type );

pj_init(&parser, buf, sizeof(buf));
pj_feed(&parser, "-");

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_ERR, tokens[0].token_type );
}

TEST(number, reals_final)
{
pj_parser parser;
char buf[256];
pj_init(&parser, buf, sizeof(buf));

pj_feed(&parser, "3.14");

array<pj_token, 3> tokens;

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "3.14", string(tokens[0].str, tokens[0].len) );
EXPECT_EQ( PJ_END, tokens[1].token_type );

pj_init(&parser, buf, sizeof(buf));
pj_feed(&parser, "3.");

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_ERR, tokens[0].token_type );

pj_init(&parser, buf, sizeof(buf));
pj_feed(&parser, "314e");

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_ERR, tokens[0].token_type );

pj_init(&parser, buf, sizeof(buf));
pj_feed(&parser, "314e-");

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_ERR, tokens[0].token_type );

pj_init(&parser, buf, sizeof(buf));
pj_feed(&parser, "314e-2");

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "314e-2", string(tokens[0].str, tokens[0].len) );
EXPECT_EQ( PJ_END, tokens[1].token_type );
}

TEST(number, chunked_integer_final)
{
pj_parser parser;
char buf[256];
pj_init(&parser, buf, sizeof(buf));

pj_feed(&parser, "4");

array<pj_token, 3> tokens;

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed(&parser, "2");
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "42", string(tokens[0].str, tokens[0].len) );
EXPECT_EQ( PJ_END, tokens[1].token_type );
}

TEST(number, two_final_toks)
{
pj_parser parser;
char buf[256];
pj_init(&parser, buf, sizeof(buf));

pj_feed(&parser, "42");

array<pj_token, 1> tokens;

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);
pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_TOK_NUM, tokens[0].token_type );
EXPECT_EQ( "42", string(tokens[0].str, tokens[0].len) );

pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_END, tokens[0].token_type );
}
19 changes: 19 additions & 0 deletions test/str.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -424,3 +424,22 @@ TEST(str, utf8_surrogate_pair)
EXPECT_EQ( u8"𝄞", string(tokens[0].str, tokens[0].len) );
EXPECT_EQ( PJ_STARVING, tokens[1].token_type );
}

TEST(str, incomplete_final) /* require proper pj_flush_tok */
{
pj_parser parser;
char buf[256];
pj_init(&parser, buf, sizeof(buf));

pj_feed(&parser, "\"abcd");

array<pj_token, 3> tokens;

pj_poll(&parser, tokens.data(), tokens.size());
EXPECT_EQ( PJ_STARVING, tokens[0].token_type );

pj_feed_end(&parser);

pj_poll(&parser, tokens.data(), tokens.size());
ASSERT_EQ( PJ_ERR, tokens[0].token_type );
}

0 comments on commit 6d253a2

Please sign in to comment.