@@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm
6666// ─── Semantic Hashing for Telegram Posts ─────────────────────────────────────
6767
6868/**
69- * Produce a normalized hash of a post's content.
70- * Strips timestamps, normalizes numbers, lowercases — so "BREAKING: 5 missiles at 14:32"
71- * and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals) .
69+ * Produce a normalized semantic hash of a post's content.
70+ * This is intentionally lossy and is only safe as a fallback when a stable
71+ * post identity is unavailable .
7272 */
7373function contentHash ( text ) {
7474 if ( ! text ) return '' ;
@@ -83,14 +83,34 @@ function contentHash(text) {
8383 return createHash ( 'sha256' ) . update ( normalized ) . digest ( 'hex' ) . substring ( 0 , 12 ) ;
8484}
8585
86+ function stablePostKey ( post ) {
87+ if ( ! post ) return '' ;
88+
89+ const sourceId = post . postId || post . messageId || '' ;
90+ const channelId = post . channel || post . chat || '' ;
91+ const date = post . date || '' ;
92+ const text = ( post . text || '' ) . trim ( ) . substring ( 0 , 200 ) ;
93+
94+ if ( sourceId ) return `id:${ sourceId } ` ;
95+ if ( channelId && date ) {
96+ return createHash ( 'sha256' )
97+ . update ( `${ channelId } |${ date } |${ text } ` )
98+ . digest ( 'hex' )
99+ . substring ( 0 , 16 ) ;
100+ }
101+
102+ return `semantic:${ contentHash ( post . text ) } ` ;
103+ }
104+
86105// ─── Core Delta Computation ──────────────────────────────────────────────────
87106
88107/**
89108 * @param {object } current - current sweep's synthesized data
90109 * @param {object|null } previous - previous sweep's synthesized data (null on first run)
91110 * @param {object } [thresholdOverrides] - optional: { numeric: {...}, count: {...} }
111+ * @param {Array<object> } [priorRuns] - optional compacted prior runs for broader dedup
92112 */
93- export function computeDelta ( current , previous , thresholdOverrides = { } ) {
113+ export function computeDelta ( current , previous , thresholdOverrides = { } , priorRuns = [ ] ) {
94114 if ( ! previous ) return null ;
95115 if ( ! current ) return null ;
96116
@@ -152,16 +172,21 @@ export function computeDelta(current, previous, thresholdOverrides = {}) {
152172
153173 // ─── New urgent Telegram posts (semantic dedup) ──────────────────────
154174
175+ // Dedup against all recent runs (not just the last one) to catch posts that
176+ // drop out of one sweep but reappear in a later one. Use stable post identity
177+ // where possible so updated posts are not collapsed into earlier alerts just
178+ // because their text is semantically similar.
179+ const sources = priorRuns . length > 0 ? priorRuns : [ previous ] ;
155180 const prevHashes = new Set (
156- ( previous . tg ?. urgent || [ ] ) . map ( p => contentHash ( p . text ) )
181+ sources . flatMap ( run => ( run ? .tg ?. urgent || [ ] ) . map ( stablePostKey ) ) . filter ( Boolean )
157182 ) ;
158183
159184 for ( const post of ( current . tg ?. urgent || [ ] ) ) {
160- const hash = contentHash ( post . text ) ;
185+ const hash = stablePostKey ( post ) ;
161186 if ( hash && ! prevHashes . has ( hash ) ) {
162187 signals . new . push ( {
163188 key : `tg_urgent:${ hash } ` ,
164- text : post . text ?. substring ( 0 , 120 ) ,
189+ text : post . text ,
165190 item : post ,
166191 reason : 'New urgent OSINT post' ,
167192 } ) ;
0 commit comments