From ba288306dedcd2767f3a265d456c52da8cdeee71 Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Tue, 8 Aug 2023 15:44:07 -0700 Subject: [PATCH] Add pg_auto_parameterize_in_array extension, for converting IN/NOT IN to = ANY or != ALL for more types When I originally developed the pg_auto_parameterize, I only handled integer arrays in order to avoid having that extension depend on the pg_array extension. This extension depends on both and adds support for the additional types. --- CHANGELOG | 2 + doc/testing.rdoc | 2 +- .../pg_auto_parameterize_in_array.rb | 110 +++++++++++++ spec/adapters/postgres_spec.rb | 80 ++++++++- .../pg_auto_parameterize_in_array_spec.rb | 155 ++++++++++++++++++ www/pages/plugins.html.erb | 4 + 6 files changed, 351 insertions(+), 2 deletions(-) create mode 100644 lib/sequel/extensions/pg_auto_parameterize_in_array.rb create mode 100644 spec/extensions/pg_auto_parameterize_in_array_spec.rb diff --git a/CHANGELOG b/CHANGELOG index 1864b0ffac..68a6e912fa 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ === master +* Add pg_auto_parameterize_in_array extension, for converting IN/NOT IN to = ANY or != ALL for more types (jeremyevans) + * Fix literalization of infinite and NaN float values in PostgreSQL array bound variables (jeremyevans) === 5.71.0 (2023-08-01) diff --git a/doc/testing.rdoc b/doc/testing.rdoc index 8d80a0e966..e2c4a87591 100644 --- a/doc/testing.rdoc +++ b/doc/testing.rdoc @@ -176,7 +176,7 @@ SEQUEL_MODEL_PREPARED_STATEMENTS :: Use the prepared_statements plugin when runn SEQUEL_MODEL_THROW_FAILURES :: Use the throw_failures plugin when running the specs SEQUEL_NO_CACHE_ASSOCIATIONS :: Don't cache association metadata when running the specs SEQUEL_NO_PENDING :: Don't skip any specs, try running all specs (note, can cause lockups for some adapters) -SEQUEL_PG_AUTO_PARAMETERIZE :: Use the pg_auto_parameterize extension when running the postgres specs +SEQUEL_PG_AUTO_PARAMETERIZE :: Use the pg_auto_parameterize extension when running the postgres specs. Value can be +in_array+ to test the pg_auto_parameterize_in_array extension, and +in_array_string+ to test the pg_auto_parameterize_in_array extension with the +:treat_in_string_list_as_text_array+ Database option set. SEQUEL_PG_TIMESTAMPTZ :: Use the pg_timestamptz extension when running the postgres specs SEQUEL_PRIMARY_KEY_LOOKUP_CHECK_VALUES :: Use the primary_key_lookup_check_values extension when running the adapter or integration specs SEQUEL_QUERY_PER_ASSOCIATION_DB_0_URL :: Run query-per-association integration tests with multiple databases (all 4 must be set to run) diff --git a/lib/sequel/extensions/pg_auto_parameterize_in_array.rb b/lib/sequel/extensions/pg_auto_parameterize_in_array.rb new file mode 100644 index 0000000000..d7b72fd86b --- /dev/null +++ b/lib/sequel/extensions/pg_auto_parameterize_in_array.rb @@ -0,0 +1,110 @@ +# frozen-string-literal: true +# +# The pg_auto_parameterize_in_array extension builds on the pg_auto_parameterize +# extension, adding support for handling additional types when converting from +# IN to = ANY and NOT IN to != ALL: +# +# DB[:table].where(column: [1.0, 2.0, ...]) +# # Without extension: column IN ($1::numeric, $2:numeric, ...) # bound variables: 1.0, 2.0, ... +# # With extension: column = ANY($1::numeric[]) # bound variables: [1.0, 2.0, ...] +# +# This prevents the use of an unbounded number of bound variables based on the +# size of the array, as well as using different SQL for different array sizes. +# +# The following types are supported when doing the conversions, with the database +# type used: +# +# Float :: if any are infinite or NaN, double precision, otherwise numeric +# BigDecimal :: numeric +# Date :: date +# Time :: timestamp (or timestamptz if pg_timestamptz extension is used) +# DateTime :: timestamp (or timestamptz if pg_timestamptz extension is used) +# Sequel::SQLTime :: time +# Sequel::SQL::Blob :: bytea +# +# String values are also supported using the +text+ type, but only if the +# +:treat_string_list_as_text_array+ Database option is used. This is because +# treating strings as text can break programs, since the type for +# literal strings in PostgreSQL is +unknown+, not +text+. +# +# The conversion is only done for single dimensional arrays that have more +# than two elements, where all elements are of the same class (other than +# nil values). +# +# Related module: Sequel::Postgres::AutoParameterizeInArray + +module Sequel + module Postgres + # Enable automatically parameterizing queries. + module AutoParameterizeInArray + # Transform column IN (...) expressions into column = ANY($) + # and column NOT IN (...) expressions into column != ALL($) + # using an array bound variable for the ANY/ALL argument, + # if all values inside the predicate are of the same type and + # the type is handled by the extension. + # This is the same optimization PostgreSQL performs internally, + # but this reduces the number of bound variables. + def complex_expression_sql_append(sql, op, args) + case op + when :IN, :"NOT IN" + l, r = args + if auto_param?(sql) && (type = _bound_variable_type_for_array(r)) + if op == :IN + op = :"=" + func = :ANY + else + op = :!= + func = :ALL + end + args = [l, Sequel.function(func, Sequel.pg_array(r, type))] + end + end + + super + end + + private + + # The bound variable type string to use for the bound variable array. + # Returns nil if a bound variable should not be used for the array. + def _bound_variable_type_for_array(r) + return unless Array === r && r.size > 1 + classes = r.map(&:class) + classes.uniq! + classes.delete(NilClass) + return unless classes.size == 1 + + klass = classes[0] + if klass == Integer + # This branch is not taken on Ruby <2.4, because of the Fixnum/Bignum split. + # However, that causes no problems as pg_auto_parameterize handles integer + # arrays natively (though the SQL used is different) + "int8" + elsif klass == String + "text" if db.typecast_value(:boolean, db.opts[:treat_string_list_as_text_array]) + elsif klass == BigDecimal + "numeric" + elsif klass == Date + "date" + elsif klass == Time + @db.cast_type_literal(Time) + elsif klass == Float + # PostgreSQL treats literal floats as numeric, not double precision + # But older versions of PostgreSQL don't handle Infinity/NaN in numeric + r.all?{|v| v.nil? || v.finite?} ? "numeric" : "double precision" + elsif klass == Sequel::SQLTime + "time" + elsif klass == DateTime + @db.cast_type_literal(DateTime) + elsif klass == Sequel::SQL::Blob + "bytea" + end + end + end + end + + Database.register_extension(:pg_auto_parameterize_in_array) do |db| + db.extension(:pg_array, :pg_auto_parameterize) + db.extend_datasets(Postgres::AutoParameterizeInArray) + end +end diff --git a/spec/adapters/postgres_spec.rb b/spec/adapters/postgres_spec.rb index 36fbc299ff..3fb3272395 100644 --- a/spec/adapters/postgres_spec.rb +++ b/spec/adapters/postgres_spec.rb @@ -13,7 +13,17 @@ end DB.extension :pg_hstore if DB.type_supported?('hstore') DB.extension :pg_multirange if DB.server_version >= 140000 -DB.extension :pg_auto_parameterize if uses_pg && ENV['SEQUEL_PG_AUTO_PARAMETERIZE'] + +if uses_pg && ENV['SEQUEL_PG_AUTO_PARAMETERIZE'] + if ENV['SEQUEL_PG_AUTO_PARAMETERIZE'] = 'in_array_string' + DB.extension :pg_auto_parameterize_in_array + DB.opts[:treat_string_list_as_text_array] = 't' + elsif ENV['SEQUEL_PG_AUTO_PARAMETERIZE'] = 'in_array' + DB.extension :pg_auto_parameterize_in_array + else + DB.extension :pg_auto_parameterize + end +end describe 'PostgreSQL adapter' do before do @@ -64,6 +74,74 @@ def c.exec_prepared(*); super; nil end Sequel.datetime_class = Time end + it "should be able to handle various types of IN/NOT IN queries" do + ds = @db.select(1) + ds.where(2=>[2, 3]).wont_be_empty + ds.where(4=>[2, 3]).must_be_empty + ds.exclude(4=>[2, 3]).wont_be_empty + ds.exclude(2=>[2, 4]).must_be_empty + + ds.where('2'=>%w[2 3]).wont_be_empty + ds.where('4'=>%w[2 3]).must_be_empty + ds.exclude('4'=>%w[2 3]).wont_be_empty + ds.exclude('2'=>%w[2 3]).must_be_empty + + ds.where(2=>[2, 3].map{|i| BigDecimal(i)}).wont_be_empty + ds.where(4=>[2, 3].map{|i| BigDecimal(i)}).must_be_empty + ds.exclude(4=>[2, 3].map{|i| BigDecimal(i)}).wont_be_empty + ds.exclude(2=>[2, 3].map{|i| BigDecimal(i)}).must_be_empty + + ds.where(Date.new(2021, 2)=>[2, 3].map{|i| Date.new(2021, i)}).wont_be_empty + ds.where(Date.new(2021, 4)=>[2, 3].map{|i| Date.new(2021, i)}).must_be_empty + ds.exclude(Date.new(2021, 4)=>[2, 3].map{|i| Date.new(2021, i)}).wont_be_empty + ds.exclude(Date.new(2021, 2)=>[2, 3].map{|i| Date.new(2021, i)}).must_be_empty + + ds.where(DateTime.new(2021, 2)=>[2, 3].map{|i| DateTime.new(2021, i)}).wont_be_empty + ds.where(DateTime.new(2021, 4)=>[2, 3].map{|i| DateTime.new(2021, i)}).must_be_empty + ds.exclude(DateTime.new(2021, 4)=>[2, 3].map{|i| DateTime.new(2021, i)}).wont_be_empty + ds.exclude(DateTime.new(2021, 2)=>[2, 3].map{|i| DateTime.new(2021, i)}).must_be_empty + + ds.where(Time.local(2021, 2)=>[2, 3].map{|i| Time.local(2021, i)}).wont_be_empty + ds.where(Time.local(2021, 4)=>[2, 3].map{|i| Time.local(2021, i)}).must_be_empty + ds.exclude(Time.local(2021, 4)=>[2, 3].map{|i| Time.local(2021, i)}).wont_be_empty + ds.exclude(Time.local(2021, 2)=>[2, 3].map{|i| Time.local(2021, i)}).must_be_empty + + ds.where(Sequel::SQLTime.create(2, 0, 0)=>[2, 3].map{|i| Sequel::SQLTime.create(i, 0, 0)}).wont_be_empty + ds.where(Sequel::SQLTime.create(4, 0, 0)=>[2, 3].map{|i| Sequel::SQLTime.create(i, 0, 0)}).must_be_empty + ds.exclude(Sequel::SQLTime.create(4, 0, 0)=>[2, 3].map{|i| Sequel::SQLTime.create(i, 0, 0)}).wont_be_empty + ds.exclude(Sequel::SQLTime.create(2, 0, 0)=>[2, 3].map{|i| Sequel::SQLTime.create(i, 0, 0)}).must_be_empty + + ds.where(2=>[2, 3].map{|i| Float(i)}).wont_be_empty + ds.where(4=>[2, 3].map{|i| Float(i)}).must_be_empty + ds.exclude(4=>[2, 3].map{|i| Float(i)}).wont_be_empty + ds.exclude(2=>[2, 3].map{|i| Float(i)}).must_be_empty + + ds.where(2=>[2.0, 3.0, 1.0/0.0, -1.0/0.0, 0.0/0.0]).wont_be_empty + ds.where(4=>[2.0, 3.0, 1.0/0.0, -1.0/0.0, 0.0/0.0]).must_be_empty + ds.exclude(4=>[2.0, 3.0, 1.0/0.0, -1.0/0.0, 0.0/0.0]).wont_be_empty + ds.exclude(2=>[2.0, 3.0, 1.0/0.0, -1.0/0.0, 0.0/0.0]).must_be_empty + + ds.where(Sequel.blob('2')=>%w[2 3].map{|i| Sequel.blob(i)}).wont_be_empty + ds.where(Sequel.blob('4')=>%w[2 3].map{|i| Sequel.blob(i)}).must_be_empty + ds.exclude(Sequel.blob('4')=>%w[2 3].map{|i| Sequel.blob(i)}).wont_be_empty + ds.exclude(Sequel.blob('2')=>%w[2 3].map{|i| Sequel.blob(i)}).must_be_empty + + ds.where(2=>[2, 3.0]).wont_be_empty + ds.where(4=>[2, 3.0]).must_be_empty + ds.exclude(4=>[2, 3.0]).wont_be_empty + ds.exclude(2=>[2, 4.0]).must_be_empty + + ds.where(2=>[2]).wont_be_empty + ds.where(4=>[2]).must_be_empty + ds.exclude(4=>[2]).wont_be_empty + ds.exclude(2=>[2]).must_be_empty + + ds.where(2=>[2, 3, nil]).wont_be_empty + ds.where(4=>[2, 3, nil]).must_be_empty + ds.exclude(4=>[2, 3, nil]).must_be_empty # NOT IN (..., NULL) predicate always false + ds.exclude(2=>[2, 4, nil]).must_be_empty + end + it "should provide a list of existing ordinary tables" do @db.create_table(:test){Integer :id} @db.tables.must_include :test diff --git a/spec/extensions/pg_auto_parameterize_in_array_spec.rb b/spec/extensions/pg_auto_parameterize_in_array_spec.rb new file mode 100644 index 0000000000..039728b924 --- /dev/null +++ b/spec/extensions/pg_auto_parameterize_in_array_spec.rb @@ -0,0 +1,155 @@ +require File.join(File.dirname(File.expand_path(__FILE__)), "spec_helper") + +describe "pg_auto_parameterize_in_array extension" do + before do + @db = Sequel.connect('mock://postgres') + @db.synchronize{|c| def c.escape_bytea(v) v*2 end} + @db.opts[:treat_string_list_as_text_array] = 't' + @db.extension :pg_auto_parameterize_in_array + end + + types = [ + ["strings if treat_string_list_as_text_array Database option is true", proc{|x| x.to_s}, "text"], + ["BigDecimals", proc{|x| BigDecimal(x)}, "numeric"], + ["dates", proc{|x| Date.new(2021, x)}, "date"], + ["times", proc{|x| Time.local(2021, x)}, "timestamp"], + ["SQLTimes", proc{|x| Sequel::SQLTime.create(x, 0, 0)}, "time"], + ["datetimes", proc{|x| DateTime.new(2021, x)}, "timestamp"], + ["floats", proc{|x| Float(x)}, "numeric"], + ["blobs", proc{|x| Sequel.blob(x.to_s)}, "bytea"], + ] + + if RUBY_VERSION >= '2.4' + types << ["integers", proc{|x| x}, "int8"] + else + it "should fallback to pg_auto_parameterize extension behavior when switching column IN/NOT IN to = ANY/!= ALL for integers" do + v = [1, 2, 3] + nv = [1, nil, 3] + type = "int8" + + sql = @db[:table].where(:a=>v).sql + sql.must_equal %'SELECT * FROM \"table\" WHERE ("a" = ANY(CAST($1 AS #{type}[])))' + sql.args.must_equal ['{1,2,3}'] + + sql = @db[:table].where(:a=>nv).sql + sql.must_equal %'SELECT * FROM \"table\" WHERE ("a" = ANY(CAST($1 AS #{type}[])))' + sql.args.must_equal ['{1,NULL,3}'] + + sql = @db[:table].exclude(:a=>v).sql + sql.must_equal %'SELECT * FROM \"table\" WHERE ("a" != ALL(CAST($1 AS #{type}[])))' + sql.args.must_equal ['{1,2,3}'] + + sql = @db[:table].exclude(:a=>nv).sql + sql.must_equal %'SELECT * FROM \"table\" WHERE ("a" != ALL(CAST($1 AS #{type}[])))' + sql.args.must_equal ['{1,NULL,3}'] + end + end + + types.each do |desc, conv, type| + it "should automatically switch column IN/NOT IN to = ANY/!= ALL for #{desc}" do + v = [1,2,3].map(&conv) + nv = (v + [nil]).freeze + + sql = @db[:table].where(:a=>v).sql + sql.must_equal %'SELECT * FROM \"table\" WHERE ("a" = ANY($1::#{type}[]))' + sql.args.must_equal [v] + + sql = @db[:table].where(:a=>nv).sql + sql.must_equal %'SELECT * FROM "table" WHERE ("a" = ANY($1::#{type}[]))' + sql.args.must_equal [nv] + + sql = @db[:table].exclude(:a=>v).sql + sql.must_equal %'SELECT * FROM "table" WHERE ("a" != ALL($1::#{type}[]))' + sql.args.must_equal [v] + + sql = @db[:table].exclude(:a=>nv).sql + sql.must_equal %'SELECT * FROM "table" WHERE ("a" != ALL($1::#{type}[]))' + sql.args.must_equal [nv] + end + end + + it "should automatically switch column IN/NOT IN to = ANY/!= ALL for infinite/NaN floats" do + v = [1.0, 1.0/0.0, -1.0/0.0, 0.0/0.0] + nv = (v + [nil]).freeze + type = "double precision" + + sql = @db[:table].where(:a=>v).sql + sql.must_equal %'SELECT * FROM \"table\" WHERE ("a" = ANY($1::#{type}[]))' + sql.args.must_equal [v] + + sql = @db[:table].where(:a=>nv).sql + sql.must_equal %'SELECT * FROM "table" WHERE ("a" = ANY($1::#{type}[]))' + sql.args.must_equal [nv] + + sql = @db[:table].exclude(:a=>v).sql + sql.must_equal %'SELECT * FROM "table" WHERE ("a" != ALL($1::#{type}[]))' + sql.args.must_equal [v] + + sql = @db[:table].exclude(:a=>nv).sql + sql.must_equal %'SELECT * FROM "table" WHERE ("a" != ALL($1::#{type}[]))' + sql.args.must_equal [nv] + end + + it "should not automatically switch column IN/NOT IN to = ANY/!= ALL for strings by default" do + @db.opts.delete(:treat_string_list_as_text_array) + v = %w'1 2' + sql = @db[:table].where([:a, :b]=>v).sql + sql.must_equal 'SELECT * FROM "table" WHERE (("a", "b") IN ($1, $2))' + sql.args.must_equal v + + sql = @db[:table].exclude([:a, :b]=>v).sql + sql.must_equal 'SELECT * FROM "table" WHERE (("a", "b") NOT IN ($1, $2))' + sql.args.must_equal v + end + + it "should not convert IN/NOT IN expressions that use unsupported types" do + v = [Sequel.lit('1'), Sequel.lit('2')].freeze + sql = @db[:table].where([:a, :b]=>v).sql + sql.must_equal 'SELECT * FROM "table" WHERE (("a", "b") IN (1, 2))' + sql.args.must_be_nil + + sql = @db[:table].exclude([:a, :b]=>v).sql + sql.must_equal 'SELECT * FROM "table" WHERE (("a", "b") NOT IN (1, 2))' + sql.args.must_be_nil + end + + it "should not convert multiple column IN expressions" do + sql = @db[:table].where([:a, :b]=>[[1.0, 2.0]]).sql + sql.must_equal 'SELECT * FROM "table" WHERE (("a", "b") IN (($1::numeric, $2::numeric)))' + sql.args.must_equal [1, 2] + + sql = @db[:table].exclude([:a, :b]=>[[1.0, 2.0]]).sql + sql.must_equal 'SELECT * FROM "table" WHERE (("a", "b") NOT IN (($1::numeric, $2::numeric)))' + sql.args.must_equal [1, 2] + end + + it "should not convert single value expressions" do + sql = @db[:table].where(:a=>[1.0]).sql + sql.must_equal 'SELECT * FROM "table" WHERE ("a" IN ($1::numeric))' + sql.args.must_equal [1] + + sql = @db[:table].where(:a=>[1.0]).sql + sql.must_equal 'SELECT * FROM "table" WHERE ("a" IN ($1::numeric))' + sql.args.must_equal [1] + end + + it "should not convert expressions with mixed types" do + sql = @db[:table].where(:a=>[1, 2.0]).sql + sql.must_equal 'SELECT * FROM "table" WHERE ("a" IN ($1::int4, $2::numeric))' + sql.args.must_equal [1, 2.0] + + sql = @db[:table].where(:a=>[1, 2.0]).sql + sql.must_equal 'SELECT * FROM "table" WHERE ("a" IN ($1::int4, $2::numeric))' + sql.args.must_equal [1, 2.0] + end + + it "should not convert other expressions" do + sql = @db[:table].where(:a=>1).sql + sql.must_equal 'SELECT * FROM "table" WHERE ("a" = $1::int4)' + sql.args.must_equal [1] + + sql = @db[:table].where(:a=>@db[:table]).sql + sql.must_equal 'SELECT * FROM "table" WHERE ("a" IN (SELECT * FROM "table"))' + sql.args.must_be_nil + end +end diff --git a/www/pages/plugins.html.erb b/www/pages/plugins.html.erb index 26ee849e92..49bab740ca 100644 --- a/www/pages/plugins.html.erb +++ b/www/pages/plugins.html.erb @@ -683,6 +683,10 @@ Automatically parameterizes queries when using the postgres adapter with the pg driver.
  • +pg_auto_parameterize_in_array +Builds on pg_auto_parameterize, but handles additional types when converting IN/NOT IN to = ANY/!= ALL. +
  • +
  • pg_enum Adds support for PostgreSQL enums.