pokt-network
diff --git a/‎Makefile‎
Lines changed: 1 addition & 1 deletion b/‎Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config/config_test.go‎
Lines changed: 5 additions & 0 deletions b/‎config/config_test.go‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎config/examples/config.shannon_example.yaml‎
Lines changed: 18 additions & 0 deletions b/‎config/examples/config.shannon_example.yaml‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎metrics/reputation/metrics.go‎
Lines changed: 89 additions & 0 deletions b/‎metrics/reputation/metrics.go‎
Lines changed: 89 additions & 0 deletions
@@ -185,4 +185,4 @@ include ./makefiles/helpers.mk
 		fi; \
 		echo ""; \
 		exit 1; \
-	fi
+	fi
@@ -94,6 +94,11 @@ func Test_LoadGatewayConfigFromYAML(t *testing.T) {
 							MinThreshold:    30,
 							RecoveryTimeout: 5 * time.Minute,
 							KeyGranularity:  reputation.KeyGranularityEndpoint,
+							TieredSelection: reputation.TieredSelectionConfig{
+								Enabled:        true,
+								Tier1Threshold: 70,
+								Tier2Threshold: 50,
+							},
 						},
 					},
 				},
 
@@ -121,6 +121,24 @@ shannon_config:
       #   sol:
       #     # Use per-supplier for sol to group by supplier
       #     key_granularity: "per-supplier"
+      # Tiered endpoint selection configuration
+      # When enabled, endpoints are grouped into tiers based on reputation score
+      # and selection prefers higher-tier endpoints using cascade-down logic
+      tiered_selection:
+        # Enable/disable tiered selection
+        # Default: true (when reputation is enabled)
+        enabled: true
+        # Minimum score for Tier 1 (Premium tier)
+        # Endpoints with scores >= tier1_threshold are selected first
+        # Default: 70
+        tier1_threshold: 70
+        # Minimum score for Tier 2 (Good tier)
+        # Endpoints with scores >= tier2_threshold but < tier1_threshold
+        # are selected only if no Tier 1 endpoints are available
+        # Default: 50
+        tier2_threshold: 50
+        # Tier 3 (Fair tier) uses min_threshold as its minimum score
+        # Endpoints in Tier 3 are selected only if Tier 1 and 2 are empty
       # Redis configuration (only used when storage_type is "redis")
       # redis:
       #   address: "localhost:6379"
 
@@ -20,13 +20,21 @@ const (
 
 	// Reputation service health metrics
 	reputationErrorsTotalMetric = "shannon_reputation_errors_total"
+
+	// Tiered selection metrics
+	reputationTierSelectionMetric = "shannon_reputation_tier_selection_total"
+
+	// Tier distribution metrics (gauge showing endpoints per tier)
+	reputationTierDistributionMetric = "shannon_reputation_tier_distribution"
 )
 
 func init() {
 	prometheus.MustRegister(reputationSignalsTotal)
 	prometheus.MustRegister(reputationEndpointsFiltered)
 	prometheus.MustRegister(reputationScoreDistribution)
 	prometheus.MustRegister(reputationErrorsTotal)
+	prometheus.MustRegister(reputationTierSelection)
+	prometheus.MustRegister(reputationTierDistribution)
 }
 
 var (
@@ -36,6 +44,10 @@ var (
 	//   - signal_type: Type of signal (success, minor_error, major_error, critical_error, fatal_error)
 	//   - endpoint_domain: Effective TLD+1 domain extracted from endpoint URL
 	//
+	// CARDINALITY WARNING: The endpoint_domain label can have high cardinality
+	// in deployments with many unique supplier domains. Monitor Prometheus memory
+	// usage and consider aggregating metrics if cardinality exceeds ~1000 unique domains.
+	//
 	// Use to analyze:
 	//   - Signal distribution by type and service
 	//   - Endpoint reliability patterns
@@ -109,6 +121,42 @@ var (
 		},
 		[]string{"operation", "error_type"},
 	)
+
+	// reputationTierSelection tracks endpoint selections by tier.
+	// Labels:
+	//   - service_id: Target service identifier
+	//   - tier: Selected tier (1=Premium, 2=Good, 3=Fair, 0=Random/disabled)
+	//
+	// Use to analyze:
+	//   - Tier distribution across services
+	//   - How often cascade-down occurs (tier 2/3 selections)
+	//   - Effectiveness of tiered selection
+	reputationTierSelection = prometheus.NewCounterVec(
+		prometheus.CounterOpts{
+			Subsystem: pathProcess,
+			Name:      reputationTierSelectionMetric,
+			Help:      "Total endpoint selections by tier",
+		},
+		[]string{"service_id", "tier"},
+	)
+
+	// reputationTierDistribution tracks the current distribution of endpoints across tiers.
+	// Labels:
+	//   - service_id: Target service identifier
+	//   - tier: Tier number (1=Premium, 2=Good, 3=Fair)
+	//
+	// Use to analyze:
+	//   - Real-time health of endpoint pool
+	//   - How endpoints are distributed across reputation tiers
+	//   - Identify services with poor endpoint quality
+	reputationTierDistribution = prometheus.NewGaugeVec(
+		prometheus.GaugeOpts{
+			Subsystem: pathProcess,
+			Name:      reputationTierDistributionMetric,
+			Help:      "Current number of endpoints in each tier",
+		},
+		[]string{"service_id", "tier"},
+	)
 )
 
 // RecordSignal records a reputation signal metric.
@@ -152,3 +200,44 @@ func RecordError(operation, errorType string) {
 		"error_type": errorType,
 	}).Inc()
 }
+
+// RecordTierSelection records which tier an endpoint was selected from.
+func RecordTierSelection(serviceID string, tier int) {
+	reputationTierSelection.With(prometheus.Labels{
+		"service_id": serviceID,
+		"tier":       tierToString(tier),
+	}).Inc()
+}
+
+// RecordTierDistribution records the current distribution of endpoints across tiers.
+// This should be called whenever tiered selection is performed to show real-time tier health.
+func RecordTierDistribution(serviceID string, tier1Count, tier2Count, tier3Count int) {
+	reputationTierDistribution.With(prometheus.Labels{
+		"service_id": serviceID,
+		"tier":       "1",
+	}).Set(float64(tier1Count))
+
+	reputationTierDistribution.With(prometheus.Labels{
+		"service_id": serviceID,
+		"tier":       "2",
+	}).Set(float64(tier2Count))
+
+	reputationTierDistribution.With(prometheus.Labels{
+		"service_id": serviceID,
+		"tier":       "3",
+	}).Set(float64(tier3Count))
+}
+
+// tierToString converts tier number to string label.
+func tierToString(tier int) string {
+	switch tier {
+	case 1:
+		return "1"
+	case 2:
+		return "2"
+	case 3:
+		return "3"
+	default:
+		return "0"
+	}
+}