From d95458c063c309402994ecdac71a1b519cd15601 Mon Sep 17 00:00:00 2001 From: Martin Hutchinson Date: Tue, 12 Dec 2023 16:35:07 +0000 Subject: [PATCH] Support for skipping subtree revisions to increase read performance and reduce disk usage (#3201) TL;DR: existing trees will continue to be stored and queried as they were before, but new trees created with the MySQL storage layer will be stored and queried in a way that uses less space and allows for simpler and faster queries. No schema changes are required by log operators. The Trillian MySQL implementation stores the internal state of the log as Subtrees in the database. These are essentially tiles as described by [tlog: Tiling a log](https://research.swtch.com/tlog). Trees created with previous versions of Trillian stored a different revision of each Subtree when the tree was updated. This is somewhat redundant for append-only logs because an earlier version of a Subtree can always be derived from a later one by simply removing entries from the right of the Subtree. This change removes this Subtree revision history, and updates Subtrees in place when they are updated. Measurements from @n-canter show that revisionless storage saves around 75% storage costs for the Subtree table, and queries over this table are more than 15% faster. The same schema is used for both revisioned and unrevisioned subtrees. The difference is that we always write a revision of 0 in the unrevisioned case, which still means that there will only be a single entry per subtree. Support is maintained for the old way of revisioning Subtrees in order to avoid breaking changes to existing trees. There is no simple code change that would safely allow a previously revisioned tree to start becoming a revisionless tree. This new revisionless Subtree feature is only available for trees created with new versions of Trillian. Users with legacy revisioned trees that wish to take advantage of smaller storage costs and faster queries of the new revisionless storage should come speak to us on [transparency-dev Slack](https://join.slack.com/t/transparency-dev/shared_invite/zt-27pkqo21d-okUFhur7YZ0rFoJVIOPznQ). The safest option we have available is to use [migrillian](https://github.com/google/certificate-transparency-go/tree/master/trillian/migrillian) to create a new copy of trees, but this will be quite a manual process and will only work for CT logs. Other migration options are conceivable and we're eager to work with the community to develop and test tools for upgrading trees in place. --- CHANGELOG.md | 36 +++++ integration/admin/admin_integration_test.go | 11 +- storage/mysql/admin_storage.go | 75 +++++++-- storage/mysql/admin_storage_test.go | 124 ++++++++++++++- storage/mysql/mysqlpb/gen.go | 18 +++ storage/mysql/mysqlpb/options.pb.go | 165 ++++++++++++++++++++ storage/mysql/mysqlpb/options.proto | 27 ++++ storage/mysql/schema/storage.sql | 4 +- storage/mysql/sql.go | 29 ++++ storage/mysql/storage_test.go | 132 +++++++++++----- storage/mysql/tree_storage.go | 65 ++++++-- storage/testonly/admin_storage_tester.go | 13 +- 12 files changed, 616 insertions(+), 83 deletions(-) create mode 100644 storage/mysql/mysqlpb/gen.go create mode 100644 storage/mysql/mysqlpb/options.pb.go create mode 100644 storage/mysql/mysqlpb/options.proto diff --git a/CHANGELOG.md b/CHANGELOG.md index 032b0693b3..569648944a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,42 @@ * Bump CI version of MySQL from 5.7 to 8.0 * Bump golangci-lint from 1.51.1 to 1.55.1 (developers should update to this version) +### MySQL: Changes to Subtree Revisions + +Support for skipping subtree revisions to increase read performance and reduce disk usage: added in #3201 + +TL;DR: existing trees will continue to be stored and queried as they were before, but new trees +created with the MySQL storage layer will be stored and queried in a way that uses less space +and allows for simpler and faster queries. No schema changes are required by log operators. + +The Trillian MySQL implementation stores the internal state of the log as Subtrees in the database. +These are essentially tiles as described by [tlog: Tiling a log](https://research.swtch.com/tlog). +Trees created with previous versions of Trillian stored a different revision of each Subtree when +the tree was updated. This is somewhat redundant for append-only logs because an earlier version +of a Subtree can always be derived from a later one by simply removing entries from the right of +the Subtree. PR #3201 removes this Subtree revision history, and updates Subtrees in place when +they are updated. + +Measurements from @n-canter show that revisionless storage saves around 75% storage costs for the +Subtree table, and queries over this table are more than 15% faster. + +The same schema is used for both revisioned and unrevisioned subtrees. The difference is that we +always write a revision of 0 in the unrevisioned case, which still means that there will only be +a single entry per subtree. + +Support is maintained for the old way of revisioning Subtrees in order to avoid breaking changes +to existing trees. There is no simple code change that would safely allow a previously revisioned +tree to start becoming a revisionless tree. This new revisionless Subtree feature is only available +for trees created with new versions of Trillian. + +Users with legacy revisioned trees that wish to take advantage of smaller storage costs and faster +queries of the new revisionless storage should come speak to us on +[transparency-dev Slack](https://join.slack.com/t/transparency-dev/shared_invite/zt-27pkqo21d-okUFhur7YZ0rFoJVIOPznQ). +The safest option we have available is to use [migrillian](https://github.com/google/certificate-transparency-go/tree/master/trillian/migrillian) to create a new copy of trees, but this will be quite a manual +process and will only work for CT logs. +Other migration options are conceivable and we're eager to work with the community to develop +and test tools for upgrading trees in place. + ## v1.5.3 * Recommended go version for development: 1.20 diff --git a/integration/admin/admin_integration_test.go b/integration/admin/admin_integration_test.go index ff24b9b304..2fdf434868 100644 --- a/integration/admin/admin_integration_test.go +++ b/integration/admin/admin_integration_test.go @@ -220,8 +220,9 @@ func TestAdminServer_UpdateTree(t *testing.T) { want.TreeId = tree.TreeId want.CreateTime = tree.CreateTime want.UpdateTime = tree.UpdateTime + want.StorageSettings = tree.StorageSettings if !proto.Equal(tree, want) { - diff := cmp.Diff(tree, want) + diff := cmp.Diff(tree, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-UpdateTree diff:\n%v", test.desc, diff) } } @@ -357,7 +358,7 @@ func TestAdminServer_DeleteTree(t *testing.T) { want.Deleted = true want.DeleteTime = deletedTree.DeleteTime if got := deletedTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-DeleteTree() diff (-got +want):\n%v", test.desc, diff) } @@ -366,7 +367,7 @@ func TestAdminServer_DeleteTree(t *testing.T) { t.Fatalf("%v: GetTree() returned err = %v", test.desc, err) } if got, want := storedTree, deletedTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-GetTree() diff (-got +want):\n%v", test.desc, diff) } } @@ -447,7 +448,7 @@ func TestAdminServer_UndeleteTree(t *testing.T) { continue } if got, want := undeletedTree, createdTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-UndeleteTree() diff (-got +want):\n%v", test.desc, diff) } @@ -456,7 +457,7 @@ func TestAdminServer_UndeleteTree(t *testing.T) { t.Fatalf("%v: GetTree() returned err = %v", test.desc, err) } if got, want := storedTree, createdTree; !proto.Equal(got, want) { - diff := cmp.Diff(got, want) + diff := cmp.Diff(got, want, cmp.Comparer(proto.Equal)) t.Errorf("%v: post-GetTree() diff (-got +want):\n%v", test.desc, diff) } } diff --git a/storage/mysql/admin_storage.go b/storage/mysql/admin_storage.go index 16ed7c911b..0b267f6732 100644 --- a/storage/mysql/admin_storage.go +++ b/storage/mysql/admin_storage.go @@ -15,17 +15,21 @@ package mysql import ( + "bytes" "context" "database/sql" + "encoding/gob" "fmt" "sync" "time" "github.com/google/trillian" "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" "google.golang.org/grpc/codes" "google.golang.org/grpc/status" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/timestamppb" "k8s.io/klog/v2" ) @@ -47,8 +51,8 @@ const ( Description, CreateTimeMillis, UpdateTimeMillis, - PrivateKey, - PublicKey, + PrivateKey, -- Unused + PublicKey, -- Used to store StorageSettings MaxRootDurationMillis, Deleted, DeleteTimeMillis @@ -62,7 +66,7 @@ const ( ) // NewAdminStorage returns a MySQL storage.AdminStorage implementation backed by DB. -func NewAdminStorage(db *sql.DB) storage.AdminStorage { +func NewAdminStorage(db *sql.DB) *mysqlAdminStorage { return &mysqlAdminStorage{db} } @@ -223,6 +227,39 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia } rootDuration := newTree.MaxRootDuration.AsDuration() + // When creating a new tree we automatically add StorageSettings to allow us to + // determine that this tree can support newer storage features. When reading + // trees that do not have this StorageSettings populated, it must be assumed that + // the tree was created with the oldest settings. + // The gist of this code is super simple: create a new StorageSettings with the most + // modern defaults if the created tree does not have one, and then create a struct that + // represents this to store in the DB. Unfortunately because this involves anypb, struct + // copies, marshalling, and proper error handling this turns into a scary amount of code. + if tree.StorageSettings != nil { + newTree.StorageSettings = proto.Clone(tree.StorageSettings).(*anypb.Any) + } else { + o := &mysqlpb.StorageOptions{ + SubtreeRevisions: false, // Default behaviour for new trees is to skip writing subtree revisions. + } + a, err := anypb.New(o) + if err != nil { + return nil, fmt.Errorf("failed to create new StorageOptions: %v", err) + } + newTree.StorageSettings = a + } + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(newTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + return nil, fmt.Errorf("failed to unmarshal StorageOptions: %v", err) + } + ss := storageSettings{ + Revisioned: o.SubtreeRevisions, + } + buff := &bytes.Buffer{} + enc := gob.NewEncoder(buff) + if err := enc.Encode(ss); err != nil { + return nil, fmt.Errorf("failed to encode storageSettings: %v", err) + } + insertTreeStmt, err := t.tx.PrepareContext( ctx, `INSERT INTO Trees( @@ -236,8 +273,8 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia Description, CreateTimeMillis, UpdateTimeMillis, - PrivateKey, - PublicKey, + PrivateKey, -- Unused + PublicKey, -- Used to store StorageSettings MaxRootDurationMillis) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`) if err != nil { @@ -261,8 +298,8 @@ func (t *adminTX) CreateTree(ctx context.Context, tree *trillian.Tree) (*trillia newTree.Description, nowMillis, nowMillis, - []byte{}, // Unused, filling in for backward compatibility. - []byte{}, // Unused, filling in for backward compatibility. + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + buff.Bytes(), // Using the otherwise unused PublicKey for storing StorageSettings. rootDuration/time.Millisecond, ) if err != nil { @@ -356,7 +393,10 @@ func (t *adminTX) UpdateTree(ctx context.Context, treeID int64, updateFunc func( tree.Description, nowMillis, rootDuration/time.Millisecond, - []byte{}, // Unused, filling in for backward compatibility. + []byte{}, // PrivateKey: Unused, filling in for backward compatibility. + // PublicKey should not be updated with any storageSettings here without + // a lot of thought put into it. At the moment storageSettings are inferred + // when reading the tree, even if no value is stored in the database. tree.TreeId); err != nil { return nil, err } @@ -419,8 +459,21 @@ func validateDeleted(ctx context.Context, tx *sql.Tx, treeID int64, wantDeleted } func validateStorageSettings(tree *trillian.Tree) error { - if tree.StorageSettings != nil { - return fmt.Errorf("storage_settings not supported, but got %v", tree.StorageSettings) + if tree.StorageSettings.MessageIs(&mysqlpb.StorageOptions{}) { + return nil } - return nil + if tree.StorageSettings == nil { + // No storage settings is OK, we'll just use the defaults for new trees + return nil + } + return fmt.Errorf("storage_settings must be nil or mysqlpb.StorageOptions, but got %v", tree.StorageSettings) +} + +// storageSettings allows us to persist storage settings to the DB. +// It is a tempting trap to use protos for this, but the way they encode +// makes it impossible to tell the difference between no value ever written +// and a value that was written with the default values for each field. +// Using an explicit struct and gob encoding allows us to tell the difference. +type storageSettings struct { + Revisioned bool } diff --git a/storage/mysql/admin_storage_test.go b/storage/mysql/admin_storage_test.go index 54d636d551..d1a7cfd81d 100644 --- a/storage/mysql/admin_storage_test.go +++ b/storage/mysql/admin_storage_test.go @@ -15,13 +15,16 @@ package mysql import ( + "bytes" "context" "database/sql" + "encoding/gob" "fmt" "testing" "github.com/google/trillian" "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" "github.com/google/trillian/storage/testonly" "google.golang.org/protobuf/proto" "google.golang.org/protobuf/types/known/anypb" @@ -128,12 +131,16 @@ func TestAdminTX_TreeWithNulls(t *testing.T) { } } -func TestAdminTX_StorageSettingsNotSupported(t *testing.T) { +func TestAdminTX_StorageSettings(t *testing.T) { cleanTestDB(DB) s := NewAdminStorage(DB) ctx := context.Background() - settings, err := anypb.New(&trillian.Tree{}) + badSettings, err := anypb.New(&trillian.Tree{}) + if err != nil { + t.Fatalf("Error marshaling proto: %v", err) + } + goodSettings, err := anypb.New(&mysqlpb.StorageOptions{}) if err != nil { t.Fatalf("Error marshaling proto: %v", err) } @@ -142,16 +149,38 @@ func TestAdminTX_StorageSettingsNotSupported(t *testing.T) { desc string // fn attempts to either create or update a tree with a non-nil, valid Any proto // on Tree.StorageSettings. It's expected to return an error. - fn func(storage.AdminStorage) error + fn func(storage.AdminStorage) error + wantErr bool }{ { - desc: "CreateTree", + desc: "CreateTree Bad Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = badSettings + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: true, + }, + { + desc: "CreateTree nil Settings", fn: func(s storage.AdminStorage) error { tree := proto.Clone(testonly.LogTree).(*trillian.Tree) - tree.StorageSettings = settings + tree.StorageSettings = nil _, err := storage.CreateTree(ctx, s, tree) return err }, + wantErr: false, + }, + { + desc: "CreateTree StorageOptions Settings", + fn: func(s storage.AdminStorage) error { + tree := proto.Clone(testonly.LogTree).(*trillian.Tree) + tree.StorageSettings = goodSettings + _, err := storage.CreateTree(ctx, s, tree) + return err + }, + wantErr: false, }, { desc: "UpdateTree", @@ -160,14 +189,93 @@ func TestAdminTX_StorageSettingsNotSupported(t *testing.T) { if err != nil { t.Fatalf("CreateTree() failed with err = %v", err) } - _, err = storage.UpdateTree(ctx, s, tree.TreeId, func(tree *trillian.Tree) { tree.StorageSettings = settings }) + _, err = storage.UpdateTree(ctx, s, tree.TreeId, func(tree *trillian.Tree) { tree.StorageSettings = badSettings }) return err }, + wantErr: true, }, } for _, test := range tests { - if err := test.fn(s); err == nil { - t.Errorf("%v: err = nil, want non-nil", test.desc) + if err := test.fn(s); (err != nil) != test.wantErr { + t.Errorf("err: %v, wantErr = %v", err, test.wantErr) + } + } +} + +// Test reading variants of trees that could have been created by old versions +// of Trillian to check we infer the correct storage options. +func TestAdminTX_GetTreeLegacies(t *testing.T) { + cleanTestDB(DB) + s := NewAdminStorage(DB) + ctx := context.Background() + + serializedStorageSettings := func(revisioned bool) []byte { + ss := storageSettings{ + Revisioned: revisioned, + } + buff := &bytes.Buffer{} + enc := gob.NewEncoder(buff) + if err := enc.Encode(ss); err != nil { + t.Fatalf("failed to encode storageSettings: %v", err) + } + return buff.Bytes() + } + tests := []struct { + desc string + key []byte + wantRevisioned bool + }{ + { + desc: "No data", + key: []byte{}, + wantRevisioned: true, + }, + { + desc: "Public key", + key: []byte("trustmethatthisisapublickey"), + wantRevisioned: true, + }, + { + desc: "StorageOptions revisioned", + key: serializedStorageSettings(true), + wantRevisioned: true, + }, + { + desc: "StorageOptions revisionless", + key: serializedStorageSettings(false), + wantRevisioned: false, + }, + } + for _, tC := range tests { + // Create a tree with default settings, and then reach into the DB to override + // whatever was written into the persisted settings to align with the test case. + tree, err := storage.CreateTree(ctx, s, testonly.LogTree) + if err != nil { + t.Fatal(err) + } + // We are reaching really into the internals here, but it's the only way to set up + // archival state. Going through the Create/Update methods will change the storage + // options. + tx, err := s.db.BeginTx(ctx, nil /* opts */) + if err != nil { + t.Fatal(err) + } + if _, err := tx.Exec("UPDATE Trees SET PublicKey = ? WHERE TreeId = ?", tC.key, tree.TreeId); err != nil { + t.Fatal(err) + } + if err := tx.Commit(); err != nil { + t.Fatal(err) + } + readTree, err := storage.GetTree(ctx, s, tree.TreeId) + if err != nil { + t.Fatal(err) + } + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(readTree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + t.Fatal(err) + } + if got, want := o.SubtreeRevisions, tC.wantRevisioned; got != want { + t.Errorf("%s SubtreeRevisions: got %t, wanted %t", tC.desc, got, want) } } } diff --git a/storage/mysql/mysqlpb/gen.go b/storage/mysql/mysqlpb/gen.go new file mode 100644 index 0000000000..875c14cbf4 --- /dev/null +++ b/storage/mysql/mysqlpb/gen.go @@ -0,0 +1,18 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Package mysqlpb contains protobuf definitions used by the mysql implementation. +package mysqlpb + +//go:generate protoc -I=. --go_out=paths=source_relative:. options.proto diff --git a/storage/mysql/mysqlpb/options.pb.go b/storage/mysql/mysqlpb/options.pb.go new file mode 100644 index 0000000000..2016a31838 --- /dev/null +++ b/storage/mysql/mysqlpb/options.pb.go @@ -0,0 +1,165 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by protoc-gen-go. DO NOT EDIT. +// versions: +// protoc-gen-go v1.31.0 +// protoc v3.20.1 +// source: options.proto + +package mysqlpb + +import ( + protoreflect "google.golang.org/protobuf/reflect/protoreflect" + protoimpl "google.golang.org/protobuf/runtime/protoimpl" + reflect "reflect" + sync "sync" +) + +const ( + // Verify that this generated code is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion) + // Verify that runtime/protoimpl is sufficiently up-to-date. + _ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20) +) + +// StorageOptions contains configuration parameters for MySQL implementation +// of the storage backend. This is envisioned only to be used for changes that +// would be breaking, but need to support old behaviour for backwards compatibility. +type StorageOptions struct { + state protoimpl.MessageState + sizeCache protoimpl.SizeCache + unknownFields protoimpl.UnknownFields + + // subtreeRevisions being explicitly set to false will skip writing subtree revisions. + // https://github.com/google/trillian/pull/3201 + SubtreeRevisions bool `protobuf:"varint,1,opt,name=subtreeRevisions,proto3" json:"subtreeRevisions,omitempty"` +} + +func (x *StorageOptions) Reset() { + *x = StorageOptions{} + if protoimpl.UnsafeEnabled { + mi := &file_options_proto_msgTypes[0] + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + ms.StoreMessageInfo(mi) + } +} + +func (x *StorageOptions) String() string { + return protoimpl.X.MessageStringOf(x) +} + +func (*StorageOptions) ProtoMessage() {} + +func (x *StorageOptions) ProtoReflect() protoreflect.Message { + mi := &file_options_proto_msgTypes[0] + if protoimpl.UnsafeEnabled && x != nil { + ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x)) + if ms.LoadMessageInfo() == nil { + ms.StoreMessageInfo(mi) + } + return ms + } + return mi.MessageOf(x) +} + +// Deprecated: Use StorageOptions.ProtoReflect.Descriptor instead. +func (*StorageOptions) Descriptor() ([]byte, []int) { + return file_options_proto_rawDescGZIP(), []int{0} +} + +func (x *StorageOptions) GetSubtreeRevisions() bool { + if x != nil { + return x.SubtreeRevisions + } + return false +} + +var File_options_proto protoreflect.FileDescriptor + +var file_options_proto_rawDesc = []byte{ + 0x0a, 0x0d, 0x6f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x12, + 0x07, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x22, 0x3c, 0x0a, 0x0e, 0x53, 0x74, 0x6f, 0x72, + 0x61, 0x67, 0x65, 0x4f, 0x70, 0x74, 0x69, 0x6f, 0x6e, 0x73, 0x12, 0x2a, 0x0a, 0x10, 0x73, 0x75, + 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x18, 0x01, + 0x20, 0x01, 0x28, 0x08, 0x52, 0x10, 0x73, 0x75, 0x62, 0x74, 0x72, 0x65, 0x65, 0x52, 0x65, 0x76, + 0x69, 0x73, 0x69, 0x6f, 0x6e, 0x73, 0x42, 0x32, 0x5a, 0x30, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, + 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2f, 0x74, 0x72, 0x69, 0x6c, + 0x6c, 0x69, 0x61, 0x6e, 0x2f, 0x73, 0x74, 0x6f, 0x72, 0x61, 0x67, 0x65, 0x2f, 0x6d, 0x79, 0x73, + 0x71, 0x6c, 0x2f, 0x6d, 0x79, 0x73, 0x71, 0x6c, 0x70, 0x62, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, +} + +var ( + file_options_proto_rawDescOnce sync.Once + file_options_proto_rawDescData = file_options_proto_rawDesc +) + +func file_options_proto_rawDescGZIP() []byte { + file_options_proto_rawDescOnce.Do(func() { + file_options_proto_rawDescData = protoimpl.X.CompressGZIP(file_options_proto_rawDescData) + }) + return file_options_proto_rawDescData +} + +var file_options_proto_msgTypes = make([]protoimpl.MessageInfo, 1) +var file_options_proto_goTypes = []interface{}{ + (*StorageOptions)(nil), // 0: mysqlpb.StorageOptions +} +var file_options_proto_depIdxs = []int32{ + 0, // [0:0] is the sub-list for method output_type + 0, // [0:0] is the sub-list for method input_type + 0, // [0:0] is the sub-list for extension type_name + 0, // [0:0] is the sub-list for extension extendee + 0, // [0:0] is the sub-list for field type_name +} + +func init() { file_options_proto_init() } +func file_options_proto_init() { + if File_options_proto != nil { + return + } + if !protoimpl.UnsafeEnabled { + file_options_proto_msgTypes[0].Exporter = func(v interface{}, i int) interface{} { + switch v := v.(*StorageOptions); i { + case 0: + return &v.state + case 1: + return &v.sizeCache + case 2: + return &v.unknownFields + default: + return nil + } + } + } + type x struct{} + out := protoimpl.TypeBuilder{ + File: protoimpl.DescBuilder{ + GoPackagePath: reflect.TypeOf(x{}).PkgPath(), + RawDescriptor: file_options_proto_rawDesc, + NumEnums: 0, + NumMessages: 1, + NumExtensions: 0, + NumServices: 0, + }, + GoTypes: file_options_proto_goTypes, + DependencyIndexes: file_options_proto_depIdxs, + MessageInfos: file_options_proto_msgTypes, + }.Build() + File_options_proto = out.File + file_options_proto_rawDesc = nil + file_options_proto_goTypes = nil + file_options_proto_depIdxs = nil +} diff --git a/storage/mysql/mysqlpb/options.proto b/storage/mysql/mysqlpb/options.proto new file mode 100644 index 0000000000..2ebdc670ae --- /dev/null +++ b/storage/mysql/mysqlpb/options.proto @@ -0,0 +1,27 @@ +// Copyright 2023 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; +option go_package = "github.com/google/trillian/storage/mysql/mysqlpb"; + +package mysqlpb; + +// StorageOptions contains configuration parameters for MySQL implementation +// of the storage backend. This is envisioned only to be used for changes that +// would be breaking, but need to support old behaviour for backwards compatibility. +message StorageOptions { + // subtreeRevisions being explicitly set to false will skip writing subtree revisions. + // https://github.com/google/trillian/pull/3201 + bool subtreeRevisions = 1; +} diff --git a/storage/mysql/schema/storage.sql b/storage/mysql/schema/storage.sql index 8de7447ef2..0d571b24fa 100644 --- a/storage/mysql/schema/storage.sql +++ b/storage/mysql/schema/storage.sql @@ -18,8 +18,8 @@ CREATE TABLE IF NOT EXISTS Trees( CreateTimeMillis BIGINT NOT NULL, UpdateTimeMillis BIGINT NOT NULL, MaxRootDurationMillis BIGINT NOT NULL, - PrivateKey MEDIUMBLOB NOT NULL, - PublicKey MEDIUMBLOB NOT NULL, + PrivateKey MEDIUMBLOB NOT NULL, -- Unused. + PublicKey MEDIUMBLOB NOT NULL, -- This is now used to store settings. Deleted BOOLEAN, DeleteTimeMillis BIGINT, PRIMARY KEY(TreeId) diff --git a/storage/mysql/sql.go b/storage/mysql/sql.go index a929952727..a48e2a554f 100644 --- a/storage/mysql/sql.go +++ b/storage/mysql/sql.go @@ -15,11 +15,15 @@ package mysql import ( + "bytes" "database/sql" + "encoding/gob" "fmt" "time" "github.com/google/trillian" + "github.com/google/trillian/storage/mysql/mysqlpb" + "google.golang.org/protobuf/types/known/anypb" "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/timestamppb" ) @@ -124,5 +128,30 @@ func readTree(r row) (*trillian.Tree, error) { } } + // We're going to try to interpret PublicKey as storageSettings, but it could be a + // public key from a really old tree, or an empty column from a tree created in the + // period between Trillian key material being removed and this column being used for + // storing settings. + buff := bytes.NewBuffer(publicKey) + dec := gob.NewDecoder(buff) + ss := &storageSettings{} + var o *mysqlpb.StorageOptions + if err := dec.Decode(ss); err != nil { + // If there are no storageSettings then this tree was created before settings + // were supported, and thus we have to populate the settings with the oldest + // settings for features. + o = &mysqlpb.StorageOptions{ + SubtreeRevisions: true, + } + } else { + o = &mysqlpb.StorageOptions{ + SubtreeRevisions: ss.Revisioned, + } + } + tree.StorageSettings, err = anypb.New(o) + if err != nil { + return nil, fmt.Errorf("failed to put StorageSettings into tree: %w", err) + } + return tree, nil } diff --git a/storage/mysql/storage_test.go b/storage/mysql/storage_test.go index 13479c1445..c08aa757f2 100644 --- a/storage/mysql/storage_test.go +++ b/storage/mysql/storage_test.go @@ -25,18 +25,47 @@ import ( "fmt" "os" "testing" + "time" "github.com/google/trillian" "github.com/google/trillian/storage" + "github.com/google/trillian/storage/mysql/mysqlpb" "github.com/google/trillian/storage/testdb" storageto "github.com/google/trillian/storage/testonly" stree "github.com/google/trillian/storage/tree" "github.com/google/trillian/types" "github.com/transparency-dev/merkle/compact" "github.com/transparency-dev/merkle/rfc6962" + "google.golang.org/protobuf/types/known/anypb" + "google.golang.org/protobuf/types/known/durationpb" "k8s.io/klog/v2" ) +var ( + // LogTree is a valid, LOG-type trillian.Tree for tests. + // This tree is configured to write revisions for each subtree. + // This matches the legacy behaviour before revisions were removed. + RevisionedLogTree = &trillian.Tree{ + TreeState: trillian.TreeState_ACTIVE, + TreeType: trillian.TreeType_LOG, + DisplayName: "Llamas Log", + Description: "Registry of publicly-owned llamas", + MaxRootDuration: durationpb.New(0 * time.Millisecond), + StorageSettings: mustCreateRevisionedStorage(), + } +) + +func mustCreateRevisionedStorage() *anypb.Any { + o := &mysqlpb.StorageOptions{ + SubtreeRevisions: true, + } + a, err := anypb.New(o) + if err != nil { + panic(err) + } + return a +} + func TestNodeRoundTrip(t *testing.T) { nodes := createSomeNodes(256) nodeIDs := make([]compact.NodeID, len(nodes)) @@ -58,11 +87,11 @@ func TestNodeRoundTrip(t *testing.T) { {desc: "store-all-read-all", store: nodes, read: nodeIDs, want: nodes}, {desc: "store-all-read-none", store: nodes, read: nil, want: nil}, } { - t.Run(tc.desc, func(t *testing.T) { + testbody := func(treeDef *trillian.Tree) { ctx := context.Background() cleanTestDB(DB) as := NewAdminStorage(DB) - tree := mustCreateTree(ctx, t, as, storageto.LogTree) + tree := mustCreateTree(ctx, t, as, treeDef) s := NewLogStorage(DB, nil) const writeRev = int64(100) @@ -86,6 +115,12 @@ func TestNodeRoundTrip(t *testing.T) { } return nil }) + } + t.Run(tc.desc+"-norevisions", func(t *testing.T) { + testbody(storageto.LogTree) + }) + t.Run(tc.desc+"-revisions", func(t *testing.T) { + testbody(RevisionedLogTree) }) } } @@ -93,50 +128,67 @@ func TestNodeRoundTrip(t *testing.T) { // This test ensures that node writes cross subtree boundaries so this edge case in the subtree // cache gets exercised. Any tree size > 256 will do this. func TestLogNodeRoundTripMultiSubtree(t *testing.T) { - ctx := context.Background() - cleanTestDB(DB) - as := NewAdminStorage(DB) - tree := mustCreateTree(ctx, t, as, storageto.LogTree) - s := NewLogStorage(DB, nil) - - const writeRev = int64(100) - const size = 871 - nodesToStore, err := createLogNodesForTreeAtSize(t, size, writeRev) - if err != nil { - t.Fatalf("failed to create test tree: %v", err) - } - nodeIDsToRead := make([]compact.NodeID, len(nodesToStore)) - for i := range nodesToStore { - nodeIDsToRead[i] = nodesToStore[i].ID + testCases := []struct { + desc string + tree *trillian.Tree + }{ + { + desc: "Revisionless", + tree: storageto.LogTree, + }, + { + desc: "Revisions", + tree: RevisionedLogTree, + }, } + for _, tC := range testCases { + t.Run(tC.desc, func(t *testing.T) { + ctx := context.Background() + cleanTestDB(DB) + as := NewAdminStorage(DB) + tree := mustCreateTree(ctx, t, as, tC.tree) + s := NewLogStorage(DB, nil) - { - runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { - forceWriteRevision(writeRev, tx) - if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { - t.Fatalf("Failed to store nodes: %s", err) + const writeRev = int64(100) + const size = 871 + nodesToStore, err := createLogNodesForTreeAtSize(t, size, writeRev) + if err != nil { + t.Fatalf("failed to create test tree: %v", err) + } + nodeIDsToRead := make([]compact.NodeID, len(nodesToStore)) + for i := range nodesToStore { + nodeIDsToRead[i] = nodesToStore[i].ID } - return storeLogRoot(ctx, tx, uint64(size), uint64(writeRev), []byte{1, 2, 3}) - }) - } - { - runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { - readNodes, err := tx.GetMerkleNodes(ctx, nodeIDsToRead) - if err != nil { - t.Fatalf("Failed to retrieve nodes: %s", err) + { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + forceWriteRevision(writeRev, tx) + if err := tx.SetMerkleNodes(ctx, nodesToStore); err != nil { + t.Fatalf("Failed to store nodes: %s", err) + } + return storeLogRoot(ctx, tx, uint64(size), uint64(writeRev), []byte{1, 2, 3}) + }) } - if err := nodesAreEqual(readNodes, nodesToStore); err != nil { - missing, extra := diffNodes(readNodes, nodesToStore) - for _, n := range missing { - t.Errorf("Missing: %v", n.ID) - } - for _, n := range extra { - t.Errorf("Extra : %v", n.ID) - } - t.Fatalf("Read back different nodes from the ones stored: %s", err) + + { + runLogTX(s, tree, t, func(ctx context.Context, tx storage.LogTreeTX) error { + readNodes, err := tx.GetMerkleNodes(ctx, nodeIDsToRead) + if err != nil { + t.Fatalf("Failed to retrieve nodes: %s", err) + } + if err := nodesAreEqual(readNodes, nodesToStore); err != nil { + missing, extra := diffNodes(readNodes, nodesToStore) + for _, n := range missing { + t.Errorf("Missing: %v", n.ID) + } + for _, n := range extra { + t.Errorf("Extra : %v", n.ID) + } + t.Fatalf("Read back different nodes from the ones stored: %s", err) + } + return nil + }) } - return nil }) } } diff --git a/storage/mysql/tree_storage.go b/storage/mysql/tree_storage.go index c9bae1b5a4..7fb5ed8d0d 100644 --- a/storage/mysql/tree_storage.go +++ b/storage/mysql/tree_storage.go @@ -26,15 +26,17 @@ import ( "github.com/google/trillian" "github.com/google/trillian/storage/cache" + "github.com/google/trillian/storage/mysql/mysqlpb" "github.com/google/trillian/storage/storagepb" "github.com/google/trillian/storage/tree" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/anypb" "k8s.io/klog/v2" ) // These statements are fixed const ( - insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) ` + placeholderSQL + insertSubtreeMultiSQL = `INSERT INTO Subtree(TreeId, SubtreeId, Nodes, SubtreeRevision) ` + placeholderSQL + ` ON DUPLICATE KEY UPDATE Nodes=VALUES(Nodes)` insertTreeHeadSQL = `INSERT INTO TreeHead(TreeId,TreeHeadTimestamp,TreeSize,RootHash,TreeRevision,RootSignature) VALUES(?,?,?,?,?,?)` @@ -52,6 +54,12 @@ const ( AND Subtree.SubtreeRevision = x.MaxRevision AND Subtree.TreeId = x.TreeId AND Subtree.TreeId = ?` + + selectSubtreeSQLNoRev = ` + SELECT SubtreeId, Subtree.Nodes + FROM Subtree + WHERE Subtree.TreeId = ? + AND SubtreeId IN (` + placeholderSQL + `)` placeholderSQL = "" ) @@ -133,8 +141,12 @@ func (m *mySQLTreeStorage) getStmt(ctx context.Context, statement string, num in return s, nil } -func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { - return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") +func (m *mySQLTreeStorage) getSubtreeStmt(ctx context.Context, subtreeRevs bool, num int) (*sql.Stmt, error) { + if subtreeRevs { + return m.getStmt(ctx, selectSubtreeSQL, num, "?", "?") + } else { + return m.getStmt(ctx, selectSubtreeSQLNoRev, num, "?", "?") + } } func (m *mySQLTreeStorage) setSubtreeStmt(ctx context.Context, num int) (*sql.Stmt, error) { @@ -147,6 +159,12 @@ func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, klog.Warningf("Could not start tree TX: %s", err) return treeTX{}, err } + var subtreeRevisions bool + o := &mysqlpb.StorageOptions{} + if err := anypb.UnmarshalTo(tree.StorageSettings, o, proto.UnmarshalOptions{}); err != nil { + return treeTX{}, fmt.Errorf("failed to unmarshal StorageSettings: %v", err) + } + subtreeRevisions = o.SubtreeRevisions return treeTX{ tx: t, mu: &sync.Mutex{}, @@ -156,6 +174,7 @@ func (m *mySQLTreeStorage) beginTreeTx(ctx context.Context, tree *trillian.Tree, hashSizeBytes: hashSizeBytes, subtreeCache: subtreeCache, writeRevision: -1, + subtreeRevs: subtreeRevisions, }, nil } @@ -170,6 +189,7 @@ type treeTX struct { hashSizeBytes int subtreeCache *cache.SubtreeCache writeRevision int64 + subtreeRevs bool } func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]byte) ([]*storagepb.SubtreeProto, error) { @@ -179,7 +199,7 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by return nil, nil } - tmpl, err := t.ts.getSubtreeStmt(ctx, len(ids)) + tmpl, err := t.ts.getSubtreeStmt(ctx, t.subtreeRevs, len(ids)) if err != nil { return nil, err } @@ -190,18 +210,28 @@ func (t *treeTX) getSubtrees(ctx context.Context, treeRevision int64, ids [][]by } }() - args := make([]interface{}, 0, len(ids)+3) + var args []interface{} + if t.subtreeRevs { + args = make([]interface{}, 0, len(ids)+3) + // populate args with ids. + for _, id := range ids { + klog.V(4).Infof(" id: %x", id) + args = append(args, id) + } + args = append(args, t.treeID) + args = append(args, treeRevision) + args = append(args, t.treeID) + } else { + args = make([]interface{}, 0, len(ids)+1) + args = append(args, t.treeID) - // populate args with ids. - for _, id := range ids { - klog.V(4).Infof(" id: %x", id) - args = append(args, id) + // populate args with ids. + for _, id := range ids { + klog.V(4).Infof(" id: %x", id) + args = append(args, id) + } } - args = append(args, t.treeID) - args = append(args, treeRevision) - args = append(args, t.treeID) - rows, err := stx.QueryContext(ctx, args...) if err != nil { klog.Warningf("Failed to get merkle subtrees: %s", err) @@ -283,6 +313,13 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre // a really large number of subtrees to store. args := make([]interface{}, 0, len(subtrees)) + // If not using subtree revisions then default value of 0 is fine. There is no + // significance to this value, other than it cannot be NULL in the DB. + var subtreeRev int64 + if t.subtreeRevs { + // We're using subtree revisions, so ensure we write at the correct revision + subtreeRev = t.writeRevision + } for _, s := range subtrees { s := s if s.Prefix == nil { @@ -295,7 +332,7 @@ func (t *treeTX) storeSubtrees(ctx context.Context, subtrees []*storagepb.Subtre args = append(args, t.treeID) args = append(args, s.Prefix) args = append(args, subtreeBytes) - args = append(args, t.writeRevision) + args = append(args, subtreeRev) } tmpl, err := t.ts.setSubtreeStmt(ctx, len(subtrees)) diff --git a/storage/testonly/admin_storage_tester.go b/storage/testonly/admin_storage_tester.go index aa734fe5e0..351a5252e5 100644 --- a/storage/testonly/admin_storage_tester.go +++ b/storage/testonly/admin_storage_tester.go @@ -328,8 +328,10 @@ func runListTreesTest(ctx context.Context, tx storage.ReadOnlyAdminTX, includeDe sort.Slice(want, func(i, j int) bool { return want[i].TreeId < want[j].TreeId }) for i, wantTree := range want { + // Ignore storage_settings changes (OK to vary between implementations) + wantTree.StorageSettings = got[i].StorageSettings if !proto.Equal(got[i], wantTree) { - return fmt.Errorf("post-ListTrees() diff (-got +want):\n%v", cmp.Diff(got, want)) + return fmt.Errorf("post-ListTrees() diff (-got +want):\n%v", cmp.Diff(got, want, cmp.Comparer(proto.Equal))) } } return nil @@ -362,8 +364,10 @@ func (tester *AdminStorageTester) TestSoftDeleteTree(t *testing.T) { wantTree := proto.Clone(test.tree).(*trillian.Tree) wantTree.Deleted = true wantTree.DeleteTime = deletedTree.DeleteTime + // Ignore storage_settings changes (OK to vary between implementations) + wantTree.StorageSettings = deletedTree.StorageSettings if got, want := deletedTree, wantTree; !proto.Equal(got, want) { - t.Errorf("%v: post-SoftDeleteTree diff (-got +want):\n%v", test.desc, cmp.Diff(got, want)) + t.Errorf("%v: post-SoftDeleteTree diff (-got +want):\n%v", test.desc, cmp.Diff(got, want, cmp.Comparer(proto.Equal))) } if err := assertStoredTree(ctx, s, deletedTree); err != nil { @@ -557,8 +561,11 @@ func assertStoredTree(ctx context.Context, s storage.AdminStorage, want *trillia if err != nil { return fmt.Errorf("GetTree() returned err = %v", err) } + + // Ignore storage_settings changes (OK to vary between implementations) + want.StorageSettings = got.StorageSettings if !proto.Equal(got, want) { - return fmt.Errorf("post-GetTree() diff (-got +want):\n%v", cmp.Diff(got, want)) + return fmt.Errorf("post-GetTree() diff (-got +want):\n%v", cmp.Diff(got, want, cmp.Comparer(proto.Equal))) } return nil }