Performance of bound vs unbound retrieval #309

mloskot · 2022-04-15T19:05:55Z

mloskot
Apr 15, 2022
Maintainer

The nanodbc 2.14 introduced optional binding to support out-of-order retrieval of unbound columns with SQLGetData (see #236). Comments to related issue #228 discussing use of SQLGetData for all columns, without binding buffers, pointed out potential of performance hit.

I've run a very basic benchmark based on 10K, 100K and 1000K rows with numeric as well as large binary and text values fetching then using both strategies: bound columns (followed by unbound in SELECT) and unbound columns.
I've used local instance of SQL Server 2019 and Visual Studio 2022 to build the benchmark program (attached below).

TL;TR: Unbinding all or offending only columns of a statement result does not impose a clear performance hit.

Results

The table shows times from three benchmark runs against the three sizes of tables for each binding/unbinding strategy:

nanodbc_fetch_bound_and_unbound.csv

Benchmark Program

#include <nanodbc/nanodbc.h>
#include <algorithm>
#include <cassert>
#include <chrono>
#include <iomanip>
#include <iostream>
#include <string>
#include <vector>

auto connection_string = NANODBC_TEXT("Driver={ODBC Driver 17 for SQL Server};Server=mloskot;Database=nanodbc;UID=sa;PWD=Password!123;");

enum class binding_strategy { bound_then_unbound, unbound_offending, unbound_all };

auto benchmark_bound_followed_by_unbound(nanodbc::string const& connection_strinb, nanodbc::string const& table, binding_strategy binding)
-> std::tuple<double, std::size_t, std::size_t, std::size_t>
{
    // i, f are bound
    // s, b, t are unbound
    nanodbc::string select;
    if (binding == binding_strategy::bound_then_unbound)
        select = NANODBC_TEXT("SELECT i, f, s, b, t FROM ") + table;
    else
        select = NANODBC_TEXT("SELECT f, s, b, i, t FROM ") + table;

    nanodbc::connection c(connection_string);
    nanodbc::statement s(c, select);
    nanodbc::result r = s.execute();

    std::vector<unsigned char> vb;
    nanodbc::string vs, vt;
    double vf;
    int vi;
    std::size_t max_vb = 0, max_vt = 0;
    std::size_t row_count = 0;

    double duration = 0;
    if (binding == binding_strategy::bound_then_unbound)
    {
        auto t0 = std::chrono::high_resolution_clock::now();
        while (r.next())
        {
            // bound
            vi = r.get<int>(0);
            vf = r.get<double>(1);
            // unbound
            vs = r.get<nanodbc::string>(2);
            vb = r.get<std::vector<unsigned char>>(3);
            vt = r.get<nanodbc::string>(4);
            max_vb = std::max(max_vb, vb.size());
            max_vt = std::max(max_vt, vt.size());
            row_count++;
        }
        auto t1 = std::chrono::high_resolution_clock::now();
        duration = std::chrono::duration<double, std::milli>(t1 - t0).count();
    }
    else // unbind offending or all
    {
        // Two ways to unbind column `i` to avoid "Invalid Descriptor Index" error
        if (binding == binding_strategy::unbound_offending)
            r.unbind(3);
        else if (binding == binding_strategy::unbound_all)
            r.unbind();
        else assert(0);

        auto t0 = std::chrono::high_resolution_clock::now();
        while (r.next())
        {
            vf = r.get<double>(0);
            vs = r.get<nanodbc::string>(1);
            vb = r.get<std::vector<unsigned char>>(2);
            vi = r.get<int>(3);
            vt = r.get<nanodbc::string>(4);
            max_vb = std::max(max_vb, vb.size());
            max_vt = std::max(max_vt, vt.size());
            row_count++;
        }
        auto t1 = std::chrono::high_resolution_clock::now();
        duration = std::chrono::duration<double, std::milli>(t1 - t0).count();
    }

    assert(row_count >= 10000);
    assert(max_vb == 8000);
    assert(max_vt >= 1024);
    return { duration, row_count, max_vb, max_vt };
}

void print_header(std::wostream& os)
{
    os << std::setw(21) << std::left << L"Binding,";
    os << std::setw(16) << std::left << L"Fetched Rows,";
    os << std::setw(11) << std::left << L"Time (ms)";
    os << std::endl;
}

void print_result(std::wostream& os, std::wstring tag, double duration, std::size_t row_count, std::size_t max_vb, std::size_t max_vt)
{
    os << std::setw(20) << std::left << tag << L',';
    os << std::setw(15) << std::left << row_count << L',';
    os.precision(2);
    os << std::setw(10) << std::left << std::fixed << duration << L',';
#ifdef _DEBUG
    os << "\tmax_vb = " << max_vb << L", max_vt = " << max_vt;
#endif
    os << std::endl;
}

int main()
{
    try
    {
        print_header(std::wcout);

#ifdef _DEBUG
        for (auto table : { L"t10000" })
#else
        for (auto table : { L"t10000", L"t100000", L"t1000000" })
#endif
        {
            auto [et1, rc1, sb1, st1] = benchmark_bound_followed_by_unbound(connection_string, table, binding_strategy::bound_then_unbound);
            print_result(std::wcout, L"bound then unbound", et1, rc1, sb1, st1);
            auto [et2, rc2, sb2, st2] = benchmark_bound_followed_by_unbound(connection_string, table, binding_strategy::unbound_offending);
            print_result(std::wcout, L"unbind offending", et2, rc2, sb2, st2);
            auto [et3, rc3, sb3, st3] = benchmark_bound_followed_by_unbound(connection_string, table, binding_strategy::unbound_all);
            print_result(std::wcout, L"unbind all", et3, rc3, sb3, st3);
        }
    }
    catch (std::exception const& e)
    {
        std::cerr << e.what() << std::endl;
    }
}

Test Data

DROP TABLE IF EXISTS [t10000];
DROP TABLE IF EXISTS [t100000];
DROP TABLE IF EXISTS [t1000000];
GO
CREATE TABLE [t10000] ([fid] INT IDENTITY(1, 1) PRIMARY KEY, [s] VARCHAR(60) NULL, [i] [INT] NULL, [b] [VARBINARY](MAX) NULL, [f] [float] NULL, [t] TEXT NULL);
CREATE TABLE [t100000] ([fid] INT IDENTITY(1, 1) PRIMARY KEY, [s] VARCHAR(60) NULL, [i] [INT] NULL, [b] [VARBINARY](MAX) NULL, [f] [float] NULL, [t] TEXT NULL);
CREATE TABLE [t1000000] ([fid] INT IDENTITY(1, 1) PRIMARY KEY, [s] VARCHAR(60) NULL, [i] [INT] NULL, [b] [VARBINARY](MAX) NULL, [f] [float] NULL, [t] TEXT NULL);
GO

DECLARE @count INT = 1000000;
DECLARE @i INT = 0;
DECLARE @f FLOAT = 1.0;
DECLARE @si VARCHAR(60);
DECLARE @sf VARCHAR(60);
DECLARE @s VARCHAR(60);
DECLARE @t VARCHAR(MAX);
DECLARE @tr VARCHAR(MAX)
WHILE @i < @count
BEGIN
  SET @i = @i + 1;
  SET @f = @f + (RAND() * 100);
  SET @si = CAST(@i AS VARCHAR(60));
  SET @sf = CAST(@f AS VARCHAR(60));
  SET @tr = (SELECT r FROM OpenJson((SELECT Crypt_Gen_Random(1024) r FOR json path)) WITH (r VARCHAR(max)))

  SET @s = 'Item number ' + RIGHT('000000' + CAST(@i AS VARCHAR(6)), 6);
  SET @t = @s + ' Random: ' + @tr;

  IF @i < 10001
    BEGIN
    INSERT INTO [t10000]    ([i],[f],[s],[t],[b]) VALUES (@si,@sf,@s,@t,(SELECT Crypt_Gen_Random(8000)));
    INSERT INTO [t100000]   ([i],[f],[s],[t],[b]) VALUES (@si,@sf,@s,@t,(SELECT Crypt_Gen_Random(8000)));
    INSERT INTO [t1000000]  ([i],[f],[s],[t],[b]) VALUES (@si,@sf,@s,@t,(SELECT Crypt_Gen_Random(8000)));
    END
  ELSE
    IF @i < 100001
      BEGIN
      INSERT INTO [t100000] ([i],[f],[s],[t],[b]) VALUES (@si,@sf,@s,@t,(SELECT Crypt_Gen_Random(8000)));
      INSERT INTO [t1000000]([i],[f],[s],[t],[b]) VALUES (@si,@sf,@s,@t,(SELECT Crypt_Gen_Random(8000)));
      END
    ELSE
      BEGIN
      INSERT INTO [t1000000]([i],[f],[s],[t],[b]) VALUES (@si,@sf,@s,@t,(SELECT Crypt_Gen_Random(8000)));
      END
END;
GO

SELECT '[t10000]' AS [table], (SELECT count(*) FROM [t10000]) AS [rows], (SELECT TOP(1) [fid] FROM [t10000] ORDER BY [fid] ASC) AS [first fid], (SELECT TOP(1) [fid] FROM [t10000] ORDER BY [fid] DESC) AS [last fid];
SELECT '[t100000]' AS [table], (SELECT count(*) FROM [t100000]) AS [rows], (SELECT TOP(1) [fid] FROM [t100000] ORDER BY [fid] ASC) AS [first fid], (SELECT TOP(1) [fid] FROM [t100000] ORDER BY [fid] DESC) AS [last fid];
SELECT '[t1000000]' AS [table], (SELECT count(*) FROM [t1000000]) AS [rows], (SELECT TOP(1) [fid] FROM [t1000000] ORDER BY [fid] ASC) AS [first fid], (SELECT TOP(1) [fid] FROM [t1000000] ORDER BY [fid] DESC) AS [last fid];
GO

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Performance of bound vs unbound retrieval #309

{{title}}

{{editor}}'s edit

{{editor}}'s edit

Replies: 0 comments

Select a reply

Performance of bound vs unbound retrieval #309

mloskot Apr 15, 2022 Maintainer

Results

Benchmark Program

Test Data

Replies: 0 comments

mloskot
Apr 15, 2022
Maintainer