Skip to content

Commit 2b5be54

Browse files
committed
Squashed commit of the following:
commit 53f6d81 Merge: 8c1ea37 5c08355 Author: Calesthio <celesthioailabs@gmail.com> Date: Wed Mar 25 10:21:02 2026 -0700 Merge pull request calesthio#68 from schergr/fix/osint-signal-truncation Fix remaining OSINT signal text truncation commit 5c08355 Author: calesthio <celesthioailabs@gmail.com> Date: Tue Mar 24 18:48:55 2026 -0700 Fix Telegram dedup identity and legacy Markdown escaping commit b7322f1 Author: Greg Scher <greg@212southadvisors.com> Date: Mon Mar 23 13:01:32 2026 -0400 Fix HTML entity decoding and broaden OSINT dedup window - Replace single &calesthio#39; handler with generic numeric/hex entity decoder so &calesthio#39; and other unpadded entities are properly converted - Dedup urgent OSINT posts against all hot memory runs (last 3 sweeps) instead of only the previous sweep, preventing posts that drop out of one sweep from reappearing as "new" in the next Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> commit 31c305c Author: Greg Scher <greg@212southadvisors.com> Date: Mon Mar 23 12:57:37 2026 -0400 Escape Markdown in alert signals and cap OSINT text in ideas prompt Addresses PR review: escape Markdown-sensitive characters in _formatTieredAlert signal bullets to prevent Telegram Bot API rejections, and add a 1500-char budget for URGENT_OSINT in compactSweepForLLM to bound prompt size while keeping full text upstream. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> commit 2d166c2 Author: Greg Scher <greg@212southadvisors.com> Date: Sat Mar 21 12:59:30 2026 -0400 Remove remaining text truncation across delta engine, memory, and ideas The prior fix (753c676) only removed truncation at source ingestion and alert formatting. Signals were still being cut to 120 chars in the delta engine, 80 chars in memory snapshots, and 120 chars in the ideas LLM context — so OSINT posts arrived at the alerter already truncated. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> commit 753c676 Author: Greg Scher <greg@212southadvisors.com> Date: Fri Mar 20 16:49:58 2026 -0400 Remove text truncation limits from Telegram posts Posts were being cut to 300 chars (source ingestion) and 150 chars (alert evaluation), losing valuable OSINT context. The sendMessage chunker already handles the 4096-char Telegram API limit. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 1684255 commit 2b5be54

File tree

5 files changed

+76
-19
lines changed

5 files changed

+76
-19
lines changed

apis/sources/telegram.mjs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ export async function getChat(chatId) {
9494
// Compact a Bot API message for briefing output
9595
function compactBotMessage(msg) {
9696
return {
97-
text: (msg.text || msg.caption || '').slice(0, 300),
97+
text: msg.text || msg.caption || '',
9898
date: msg.date ? new Date(msg.date * 1000).toISOString() : null,
9999
chat: msg.chat?.title || msg.chat?.username || 'unknown',
100100
views: msg.views || 0,
@@ -169,10 +169,12 @@ function parseWebPreview(html, channelId) {
169169
.replace(/&lt;/g, '<')
170170
.replace(/&gt;/g, '>')
171171
.replace(/&quot;/g, '"')
172-
.replace(/&#039;/g, "'")
172+
.replace(/&#0*39;/g, "'")
173+
.replace(/&#x0*27;/gi, "'")
174+
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
175+
.replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16)))
173176
.replace(/&nbsp;/g, ' ')
174-
.trim()
175-
.slice(0, 300);
177+
.trim();
176178
}
177179

178180
// Extract view count

lib/alerts/telegram.mjs

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -273,7 +273,7 @@ export class TelegramAlerter {
273273
headline: `OSINT Surge: ${osintNew.length} New Urgent Posts`,
274274
reason: `${osintNew.length} new urgent OSINT signals detected. Elevated conflict reporting tempo.`,
275275
actionable: 'Review OSINT stream for pattern. Cross-check with satellite and ACLED data.',
276-
signals: osintNew.map(s => (s.text || '').substring(0, 40)).slice(0, 3),
276+
signals: osintNew.map(s => s.text || s.label || s.key).slice(0, 5),
277277
crossCorrelation: 'telegram OSINT',
278278
};
279279
}
@@ -684,7 +684,7 @@ Respond with ONLY valid JSON:
684684
if (osintSignals.length > 0) {
685685
sections.push('📡 OSINT SIGNALS:\n' + osintSignals.map(s => {
686686
const post = s.item || s;
687-
return ` [${post.channel || 'UNKNOWN'}] ${(post.text || s.reason || '').substring(0, 150)}`;
687+
return ` [${post.channel || 'UNKNOWN'}] ${post.text || s.reason || ''}`;
688688
}).join('\n'));
689689
}
690690

@@ -731,7 +731,10 @@ Respond with ONLY valid JSON:
731731
}
732732

733733
if (evaluation.signals?.length) {
734-
lines.push('', `Signals: ${evaluation.signals.join(' · ')}`);
734+
lines.push('', `*Signals:*`);
735+
for (const sig of evaluation.signals) {
736+
lines.push(`• ${escapeMd(sig)}`);
737+
}
735738
}
736739

737740
lines.push('', `_${new Date().toISOString().replace('T', ' ').substring(0, 19)} UTC_`);
@@ -742,7 +745,14 @@ Respond with ONLY valid JSON:
742745

743746
// ─── Helpers ──────────────────────────────────────────────────────────────
744747

745-
export function parseJSON(text) {
748+
function escapeMd(text) {
749+
if (!text) return '';
750+
// The bot sends alerts with legacy Markdown parse mode, not MarkdownV2.
751+
// Escape only the characters that legacy Markdown actually treats as markup.
752+
return text.replace(/([_*`\[])/g, '\\$1');
753+
}
754+
755+
function parseJSON(text) {
746756
if (!text) return null;
747757
let cleaned = text.trim();
748758
if (cleaned.startsWith('```')) {

lib/delta/engine.mjs

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm
6666
// ─── Semantic Hashing for Telegram Posts ─────────────────────────────────────
6767

6868
/**
69-
* Produce a normalized hash of a post's content.
70-
* Strips timestamps, normalizes numbers, lowercases — so "BREAKING: 5 missiles at 14:32"
71-
* and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals).
69+
* Produce a normalized semantic hash of a post's content.
70+
* This is intentionally lossy and is only safe as a fallback when a stable
71+
* post identity is unavailable.
7272
*/
7373
function contentHash(text) {
7474
if (!text) return '';
@@ -83,14 +83,34 @@ function contentHash(text) {
8383
return createHash('sha256').update(normalized).digest('hex').substring(0, 12);
8484
}
8585

86+
function stablePostKey(post) {
87+
if (!post) return '';
88+
89+
const sourceId = post.postId || post.messageId || '';
90+
const channelId = post.channel || post.chat || '';
91+
const date = post.date || '';
92+
const text = (post.text || '').trim().substring(0, 200);
93+
94+
if (sourceId) return `id:${sourceId}`;
95+
if (channelId && date) {
96+
return createHash('sha256')
97+
.update(`${channelId}|${date}|${text}`)
98+
.digest('hex')
99+
.substring(0, 16);
100+
}
101+
102+
return `semantic:${contentHash(post.text)}`;
103+
}
104+
86105
// ─── Core Delta Computation ──────────────────────────────────────────────────
87106

88107
/**
89108
* @param {object} current - current sweep's synthesized data
90109
* @param {object|null} previous - previous sweep's synthesized data (null on first run)
91110
* @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} }
111+
* @param {Array<object>} [priorRuns] - optional compacted prior runs for broader dedup
92112
*/
93-
export function computeDelta(current, previous, thresholdOverrides = {}) {
113+
export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) {
94114
if (!previous) return null;
95115
if (!current) return null;
96116

@@ -152,16 +172,21 @@ export function computeDelta(current, previous, thresholdOverrides = {}) {
152172

153173
// ─── New urgent Telegram posts (semantic dedup) ──────────────────────
154174

175+
// Dedup against all recent runs (not just the last one) to catch posts that
176+
// drop out of one sweep but reappear in a later one. Use stable post identity
177+
// where possible so updated posts are not collapsed into earlier alerts just
178+
// because their text is semantically similar.
179+
const sources = priorRuns.length > 0 ? priorRuns : [previous];
155180
const prevHashes = new Set(
156-
(previous.tg?.urgent || []).map(p => contentHash(p.text))
181+
sources.flatMap(run => (run?.tg?.urgent || []).map(stablePostKey)).filter(Boolean)
157182
);
158183

159184
for (const post of (current.tg?.urgent || [])) {
160-
const hash = contentHash(post.text);
185+
const hash = stablePostKey(post);
161186
if (hash && !prevHashes.has(hash)) {
162187
signals.new.push({
163188
key: `tg_urgent:${hash}`,
164-
text: post.text?.substring(0, 120),
189+
text: post.text,
165190
item: post,
166191
reason: 'New urgent OSINT post',
167192
});

lib/delta/memory.mjs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ export class MemoryManager {
7474
// Add a new run to hot memory
7575
addRun(synthesizedData) {
7676
const previous = this.getLastRun();
77-
const delta = computeDelta(synthesizedData, previous);
77+
// Collect urgent post hashes from all hot runs for broader dedup window
78+
const priorRuns = this.hot.runs.map(r => r.data);
79+
const delta = computeDelta(synthesizedData, previous, {}, priorRuns);
7880

7981
// Compact the data for storage (strip large arrays)
8082
const compact = this._compactForStorage(synthesizedData);
@@ -199,7 +201,15 @@ export class MemoryManager {
199201
bls: data.bls,
200202
treasury: data.treasury,
201203
gscpi: data.gscpi,
202-
tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text?.substring(0, 80), date: p.date })) },
204+
tg: {
205+
posts: data.tg?.posts,
206+
urgent: (data.tg?.urgent || []).map(p => ({
207+
text: p.text,
208+
date: p.date,
209+
channel: p.channel || p.chat || null,
210+
postId: p.postId || null,
211+
})),
212+
},
203213
thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })),
204214
air: (data.air || []).map(a => ({ region: a.region, total: a.total })),
205215
nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })),

lib/llm/ideas.mjs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,20 @@ function compactSweepForLLM(data, delta, previousIdeas) {
128128
sections.push(`SUPPLY_CHAIN: GSCPI=${data.gscpi.value} (${data.gscpi.interpretation})`);
129129
}
130130

131-
// Geopolitical signals
131+
// Geopolitical signals (cap total OSINT text to ~1500 chars to keep prompt compact)
132132
const urgentPosts = (data.tg?.urgent || []).slice(0, 5);
133133
if (urgentPosts.length) {
134-
sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${(p.text || '').substring(0, 120)}`).join('\n')}`);
134+
const MAX_OSINT_CHARS = 1500;
135+
let remaining = MAX_OSINT_CHARS;
136+
const lines = [];
137+
for (const p of urgentPosts) {
138+
const text = p.text || '';
139+
if (remaining <= 0) break;
140+
const trimmed = text.length > remaining ? text.substring(0, remaining) + '…' : text;
141+
lines.push(`- ${trimmed}`);
142+
remaining -= trimmed.length;
143+
}
144+
sections.push(`URGENT_OSINT:\n${lines.join('\n')}`);
135145
}
136146

137147
// Thermal / fire detections

0 commit comments

Comments
 (0)