Skip to content
Draft
Show file tree
Hide file tree
Changes from 29 commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
a0516f9
feat: phase 1 of major refactor - move fullnode functionality to SDK
Jun 11, 2025
eb145f2
comments
Jun 11, 2025
2f7029b
make validte relay request a method on the full node
Jun 11, 2025
ca3d365
feat: implement GatewayClients and move relevant code from PATH to Sh…
Jun 11, 2025
8cea41c
move FullNode interface to sdk package
Jun 11, 2025
8585235
make gateway client structs private
Jun 11, 2025
6d88a19
gateway mode method on gateway client
Jun 11, 2025
d6dd265
remove unused queryCodec
Jun 12, 2025
a94f720
Merge branch 'shannon-sdk-refactor' into shannon-sdk-refactor-pt2
Jun 12, 2025
c19c78a
update to sync comments with PATH PR #298
Jun 13, 2025
88ebd61
chore: merge conflicts
Jun 13, 2025
a549c5f
chore: merge conflicts
Jun 13, 2025
0a86437
updated to incorporate PR 298 changes
Jun 13, 2025
de5522f
move error handling for observations for delegated mode
Jun 13, 2025
71f0037
update comments in full node config
Jun 13, 2025
3489fb4
Merge branch 'shannon-sdk-refactor' into shannon-sdk-refactor-pt2
Jun 13, 2025
d81e4bd
chore: merge conflicts
Jun 16, 2025
9740238
fix: implement review comments
Jun 16, 2025
b37b272
fix: implement review comments
Jun 16, 2025
f4fdaac
make 'getAccountPubKey' private
Jun 17, 2025
544fd19
review comments and minor refactor
Jun 17, 2025
b2d5a08
fix observation errors
Jun 17, 2025
1106f83
fix: implement review comments
Jun 18, 2025
8b09728
fix: implement review comments
Jun 18, 2025
e715f0c
fix: implement review comments
Jun 18, 2025
c6e7b6b
improve logs and erros in gateway client
Jun 18, 2025
990759a
add comments make pub key fetch private
Jun 18, 2025
545fc0e
fix: implement review comments
Jun 18, 2025
a3a227f
reintroduce "lazy" fetching concept as GRPC client
Jun 19, 2025
6dc1456
fix comments
Jun 19, 2025
b556cff
fix comment
Jun 19, 2025
4f67b8a
fix nil logger
Jun 19, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 0 additions & 40 deletions account.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,11 @@ package sdk
import (
"context"

"github.com/cosmos/cosmos-sdk/codec"
cdctypes "github.com/cosmos/cosmos-sdk/codec/types"
cryptocodec "github.com/cosmos/cosmos-sdk/crypto/codec"
cryptotypes "github.com/cosmos/cosmos-sdk/crypto/types"
"github.com/cosmos/cosmos-sdk/types"
accounttypes "github.com/cosmos/cosmos-sdk/x/auth/types"
grpc "github.com/cosmos/gogoproto/grpc"
grpcoptions "google.golang.org/grpc"
)

var queryCodec *codec.ProtoCodec

// -----------------------------
// Interfaces and Structs
// -----------------------------
Expand All @@ -39,39 +32,6 @@ type AccountClient struct {
PoktNodeAccountFetcher
}

// -----------------------------
// Functions
// -----------------------------

// init initializes the codec for the account module.
func init() {
reg := cdctypes.NewInterfaceRegistry()
accounttypes.RegisterInterfaces(reg)
cryptocodec.RegisterInterfaces(reg)
queryCodec = codec.NewProtoCodec(reg)
}

// GetPubKeyFromAddress returns the public key of the account with the given address.
//
// - Queries the account module using the gRPC query client.
func (ac *AccountClient) GetPubKeyFromAddress(
ctx context.Context,
address string,
) (pubKey cryptotypes.PubKey, err error) {
req := &accounttypes.QueryAccountRequest{Address: address}
res, err := ac.PoktNodeAccountFetcher.Account(ctx, req)
if err != nil {
return nil, err
}

var fetchedAccount types.AccountI
if err = queryCodec.UnpackAny(res.Account, &fetchedAccount); err != nil {
return nil, err
}

return fetchedAccount.GetPubKey(), nil
}

// NewPoktNodeAccountFetcher returns the default implementation of the PoktNodeAccountFetcher interface.
//
// - Connects to a POKT full node through the account module's query client to get account data.
Expand Down
43 changes: 0 additions & 43 deletions application.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,14 @@ package sdk

import (
"context"
"errors"
"fmt"
"slices"

cryptotypes "github.com/cosmos/cosmos-sdk/crypto/types"
query "github.com/cosmos/cosmos-sdk/types/query"
"github.com/pokt-network/poktroll/pkg/crypto/rings"
"github.com/pokt-network/poktroll/x/application/types"
"github.com/pokt-network/ring-go"
)

type ApplicationRing struct {
types.Application
PublicKeyFetcher
}

// ApplicationClient is the interface to interact with the on-chain application-module.
//
// - Used to get the list of applications and the details of a specific application
Expand Down Expand Up @@ -128,38 +120,3 @@ func (ac *ApplicationClient) GetApplicationsDelegatingToGateway(

return gatewayDelegatingApplications, nil
}

// GetRing returns the ring for the application until the current session end height.
//
// - Ring is created using the application's public key and the public keys of gateways currently delegated from the application
// - Returns error if PublicKeyFetcher is not set or any pubkey fetch fails
func (a ApplicationRing) GetRing(
ctx context.Context,
sessionEndHeight uint64,
) (addressRing *ring.Ring, err error) {
if a.PublicKeyFetcher == nil {
return nil, errors.New("GetRing: Public Key Fetcher not set")
}

currentGatewayAddresses := rings.GetRingAddressesAtSessionEndHeight(&a.Application, sessionEndHeight)

ringAddresses := make([]string, 0)
ringAddresses = append(ringAddresses, a.Application.Address)

if len(currentGatewayAddresses) == 0 {
ringAddresses = append(ringAddresses, a.Application.Address)
} else {
ringAddresses = append(ringAddresses, currentGatewayAddresses...)
}

ringPubKeys := make([]cryptotypes.PubKey, 0, len(ringAddresses))
for _, address := range ringAddresses {
pubKey, err := a.PublicKeyFetcher.GetPubKeyFromAddress(ctx, address)
if err != nil {
return nil, err
}
ringPubKeys = append(ringPubKeys, pubKey)
}

return rings.GetRingFromPubKeys(ringPubKeys)
}
266 changes: 266 additions & 0 deletions client/cache.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,266 @@
package client

import (
"context"
"fmt"
"math"
"time"

cryptotypes "github.com/cosmos/cosmos-sdk/crypto/types"
"github.com/pokt-network/poktroll/pkg/polylog"
apptypes "github.com/pokt-network/poktroll/x/application/types"
sessiontypes "github.com/pokt-network/poktroll/x/session/types"
"github.com/viccon/sturdyc"

sdk "github.com/pokt-network/shannon-sdk"
)

// GatewayClientCache implements OnchainDataFetcher interface.
var _ OnchainDataFetcher = &GatewayClientCache{}

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[linter-name (fail-on-found)] reported by reviewdog 🐶
var _ OnchainDataFetcher = &GatewayClientCache{}


// ---------------- Cache Configuration ----------------
const (
// Retry base delay for exponential backoff on failed refreshes
retryBaseDelay = 100 * time.Millisecond

// cacheCapacity:
// - Max entries across all shards (not per-shard)
// - Exceeding capacity triggers LRU eviction per shard
// - 100k supports most large deployments
// - TODO_TECHDEBT(@commoddity): Revisit based on real-world usage; consider making configurable
cacheCapacity = 100_000

// numShards:
// - Number of independent cache shards for concurrency
// - Reduces lock contention, improves parallelism
// - 10 is a good balance for most workloads
numShards = 10

// evictionPercentage:
// - % of LRU entries evicted per shard when full
// - 10% = incremental cleanup, avoids memory spikes
// - SturdyC also evicts expired entries in background
evictionPercentage = 10

// TODO_TECHDEBT(@commoddity): See Issue #291 for improvements to refresh logic
// minEarlyRefreshPercentage:
// - Earliest point (as % of TTL) to start background refresh
// - 0.75 = 75% of TTL (e.g. 22.5s for 30s TTL)
minEarlyRefreshPercentage = 0.75

// maxEarlyRefreshPercentage:
// - Latest point (as % of TTL) to start background refresh
// - 0.9 = 90% of TTL (e.g. 27s for 30s TTL)
// - Ensures refresh always completes before expiry
maxEarlyRefreshPercentage = 0.9
)

// pubKeyCacheTTL: No TTL for the account public key cache since account data never changes.
//
// time.Duration(math.MaxInt64) equals ~292 years, which is effectively infinite.
const pubKeyCacheTTL = time.Duration(math.MaxInt64)

// Prefix for cache keys to avoid collisions with other keys.
const (
sessionCacheKeyPrefix = "session"
accountPubKeyCacheKeyPrefix = "pubkey"
)

// GatewayClientCache provides a caching layer for the gateway client.
// It is the primary fetching/caching layer that caches onchain data using SturdyC.
// It uses the OnchainDataFetcher interface to fetch data from the full node before caching it.
//
// - Early refresh: background updates before expiry (prevents thundering herd/latency spikes)
// - Example: 30s TTL, refresh at 22.5–27s (75–90%)
// - Benefits: zero-latency reads, graceful degradation, auto load balancing
//
// Docs: https://github.com/viccon/sturdyc
type GatewayClientCache struct {
logger polylog.Logger

onchainDataFetcher OnchainDataFetcher

// Session cache
// TODO_MAINNET_MIGRATION(@Olshansk): Revisit after mainnet
// TODO_NEXT(@commoddity): Session refresh handling should be significantly reworked as part of the next changes following PATH PR #297.
// The proposed change is to align session refreshes with actual session expiry time,
// using the session expiry block and the Shannon SDK's block client.
// When this is done, session cache TTL can be removed altogether.
sessionCache *sturdyc.Client[sessiontypes.Session]

// The account public key cache; used to cache account public keys indefinitely.
// It has an infinite TTL and is populated only once on startup.
accountPubKeyCache *sturdyc.Client[cryptotypes.PubKey]
}

// NewGatewayClientCache connects to a Shannon full node and creates a GatewayClientCache.
// It uses the full node's RPC URL and gRPC configuration to connect to the full node.
// It creates the SDK clients and SturdyC cache to provide the primary fetching/caching layer.
//
// - Session cache: refreshes early to avoid thundering herd/latency spikes
// - Account public key cache: indefinite cache for account data
// - Application client: used by GatewayClientCache to fetch applications from the full node
// - Session client: used by GatewayClientCache to fetch sessions from the full node
// - Account client: used by GatewayClientCache to fetch accounts from the full node
func NewGatewayClientCache(
logger polylog.Logger,
dataFetcher OnchainDataFetcher,
cacheConfig CacheConfig,
) (*GatewayClientCache, error) {
logger = logger.With("client", "gateway_client_cache")

cacheConfig.hydrateDefaults()
logger.Info().
Bool("use_cache", *cacheConfig.UseCache).
Dur("session_ttl", cacheConfig.SessionTTL).
Bool("early_refresh_enabled", *cacheConfig.EarlyRefreshEnabled).
Msg("Cache configuration")

// Create the session cache with early refreshes
sessionCache := getCache[sessiontypes.Session](
cacheConfig.SessionTTL,
*cacheConfig.EarlyRefreshEnabled,
)

// Create the account cache, which is effectively infinite
// caching for the lifetime of the application.
accountPubKeyCache := getCache[cryptotypes.PubKey](
pubKeyCacheTTL,
false, // Never refresh the account public key cache
)

return &GatewayClientCache{
logger: logger,

onchainDataFetcher: dataFetcher,

sessionCache: sessionCache,
accountPubKeyCache: accountPubKeyCache,
}, nil
}

// getCache creates a SturdyC cache with the given TTL and early refresh configuration.
//
// If early refresh is enabled, SturdyC will refresh the cache before it
// expires to ensure that the cache is always hot and never blocking.
//
// See: https://github.com/viccon/sturdyc?tab=readme-ov-file#early-refreshes
func getCache[T any](ttl time.Duration, earlyRefreshEnabled bool) *sturdyc.Client[T] {
if earlyRefreshEnabled {
// Configure session cache with early refreshes
minRefreshDelay, maxRefreshDelay := getCacheDelays(ttl)

// Create the session cache with early refreshes
// See: https://github.com/viccon/sturdyc?tab=readme-ov-file#early-refreshes
return sturdyc.New[T](
cacheCapacity,
numShards,
ttl,
evictionPercentage,
sturdyc.WithEarlyRefreshes(
minRefreshDelay,
maxRefreshDelay,
ttl,
retryBaseDelay,
),
)
} else {
// Create the session cache without early refreshes
return sturdyc.New[T](
cacheCapacity,
numShards,
ttl,
evictionPercentage,
)
}
}

// getCacheDelays returns the min/max delays for SturdyC's Early Refresh strategy.
// - Proactively refreshes cache before expiry (prevents misses/latency spikes)
// - Refresh window: 75-90% of TTL (e.g. 22.5-27s for 30s TTL)
// - Spreads requests to avoid thundering herd
//
// See: https://github.com/viccon/sturdyc?tab=readme-ov-file#early-refreshes
func getCacheDelays(ttl time.Duration) (min, max time.Duration) {
minFloat := float64(ttl) * minEarlyRefreshPercentage
maxFloat := float64(ttl) * maxEarlyRefreshPercentage

// Round to the nearest second
min = time.Duration(minFloat/float64(time.Second)+0.5) * time.Second
max = time.Duration(maxFloat/float64(time.Second)+0.5) * time.Second
return
}

// GetApp always fetches the app from the full node, rather than caching it.
//
// This is because fetching apps is only needed on gateway startup in order to determine
// the staked services information for apps owned by the gateway.
//
// In all other contexts - such as sending relay requests - applications are accessed
// by fetching the session for the app, which contains the application.
func (gcc *GatewayClientCache) GetApp(ctx context.Context, appAddr string) (apptypes.Application, error) {
return gcc.onchainDataFetcher.GetApp(ctx, appAddr)
}

// GetSession returns (and auto-refreshes) the session for a service/app from cache.
func (gcc *GatewayClientCache) GetSession(
ctx context.Context,
serviceID sdk.ServiceID,
appAddr string,
) (sessiontypes.Session, error) {
// See: https://github.com/viccon/sturdyc?tab=readme-ov-file#get-or-fetch
return gcc.sessionCache.GetOrFetch(
ctx,
getSessionCacheKey(serviceID, appAddr),
func(fetchCtx context.Context) (sessiontypes.Session, error) {
gcc.logger.Debug().
Str("session_key", getSessionCacheKey(serviceID, appAddr)).
Msgf("GetSession: GatewayClientCache making request to full node for service %s", serviceID)

return gcc.onchainDataFetcher.GetSession(fetchCtx, serviceID, appAddr)
},
)
}

// getSessionCacheKey builds a unique cache key for session: <prefix>:<serviceID>:<appAddr>
func getSessionCacheKey(serviceID sdk.ServiceID, appAddr string) string {
return fmt.Sprintf("%s:%s:%s", sessionCacheKeyPrefix, serviceID, appAddr)
}

// getAccountPubKey returns the account public key for the given address.
// The account public key cache has no TTL, so the public key is cached indefinitely.
//
// The `fetchFn` param of `GetOrFetch` is only called once per address on startup.
func (gcc *GatewayClientCache) GetAccountPubKey(
ctx context.Context,
address string,
) (pubKey cryptotypes.PubKey, err error) {
// See: https://github.com/viccon/sturdyc?tab=readme-ov-file#get-or-fetch
return gcc.accountPubKeyCache.GetOrFetch(
ctx,
getAccountPubKeyCacheKey(address),
func(fetchCtx context.Context) (cryptotypes.PubKey, error) {
gcc.logger.Debug().
Str("account_key", getAccountPubKeyCacheKey(address)).
Msgf("GetAccountPubKey: GatewayClientCache making request to full node")

return gcc.onchainDataFetcher.GetAccountPubKey(fetchCtx, address)
},
)
}

// getAccountPubKeyCacheKey returns the cache key for the given account address.
// It uses the accountPubKeyCacheKeyPrefix and the account address to create a unique key.
//
// eg. "pubkey:pokt1up7zlytnmvlsuxzpzvlrta95347w322adsxslw"
func getAccountPubKeyCacheKey(address string) string {
return fmt.Sprintf("%s:%s", accountPubKeyCacheKeyPrefix, address)
}

// IsHealthy satisfies the interface required by the ShannonFullNode interface.
// TODO_IMPROVE(@commoddity):
// - Add smarter health checks (e.g. verify cached apps/sessions)
// - Currently always true (cache fills as needed)
func (gcc *GatewayClientCache) IsHealthy() bool {
return true
}
Loading