Skip to content

Commit 53f6d81

Browse files
authored
Merge pull request #68 from schergr/fix/osint-signal-truncation
Fix remaining OSINT signal text truncation
2 parents 8c1ea37 + 5c08355 commit 53f6d81

File tree

5 files changed

+75
-18
lines changed

5 files changed

+75
-18
lines changed

apis/sources/telegram.mjs

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@ export async function getChat(chatId) {
9494
// Compact a Bot API message for briefing output
9595
function compactBotMessage(msg) {
9696
return {
97-
text: (msg.text || msg.caption || '').slice(0, 300),
97+
text: msg.text || msg.caption || '',
9898
date: msg.date ? new Date(msg.date * 1000).toISOString() : null,
9999
chat: msg.chat?.title || msg.chat?.username || 'unknown',
100100
views: msg.views || 0,
@@ -169,10 +169,12 @@ function parseWebPreview(html, channelId) {
169169
.replace(/&lt;/g, '<')
170170
.replace(/&gt;/g, '>')
171171
.replace(/&quot;/g, '"')
172-
.replace(/&#039;/g, "'")
172+
.replace(/&#0*39;/g, "'")
173+
.replace(/&#x0*27;/gi, "'")
174+
.replace(/&#(\d+);/g, (_, n) => String.fromCharCode(Number(n)))
175+
.replace(/&#x([0-9a-f]+);/gi, (_, h) => String.fromCharCode(parseInt(h, 16)))
173176
.replace(/&nbsp;/g, ' ')
174-
.trim()
175-
.slice(0, 300);
177+
.trim();
176178
}
177179

178180
// Extract view count

lib/alerts/telegram.mjs

Lines changed: 13 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -271,7 +271,7 @@ export class TelegramAlerter {
271271
headline: `OSINT Surge: ${osintNew.length} New Urgent Posts`,
272272
reason: `${osintNew.length} new urgent OSINT signals detected. Elevated conflict reporting tempo.`,
273273
actionable: 'Review OSINT stream for pattern. Cross-check with satellite and ACLED data.',
274-
signals: osintNew.map(s => (s.text || '').substring(0, 40)).slice(0, 3),
274+
signals: osintNew.map(s => s.text || s.label || s.key).slice(0, 5),
275275
crossCorrelation: 'telegram OSINT',
276276
};
277277
}
@@ -681,7 +681,7 @@ Respond with ONLY valid JSON:
681681
if (osintSignals.length > 0) {
682682
sections.push('📡 OSINT SIGNALS:\n' + osintSignals.map(s => {
683683
const post = s.item || s;
684-
return ` [${post.channel || 'UNKNOWN'}] ${(post.text || s.reason || '').substring(0, 150)}`;
684+
return ` [${post.channel || 'UNKNOWN'}] ${post.text || s.reason || ''}`;
685685
}).join('\n'));
686686
}
687687

@@ -728,7 +728,10 @@ Respond with ONLY valid JSON:
728728
}
729729

730730
if (evaluation.signals?.length) {
731-
lines.push('', `Signals: ${evaluation.signals.join(' · ')}`);
731+
lines.push('', `*Signals:*`);
732+
for (const sig of evaluation.signals) {
733+
lines.push(`• ${escapeMd(sig)}`);
734+
}
732735
}
733736

734737
lines.push('', `_${new Date().toISOString().replace('T', ' ').substring(0, 19)} UTC_`);
@@ -739,6 +742,13 @@ Respond with ONLY valid JSON:
739742

740743
// ─── Helpers ──────────────────────────────────────────────────────────────
741744

745+
function escapeMd(text) {
746+
if (!text) return '';
747+
// The bot sends alerts with legacy Markdown parse mode, not MarkdownV2.
748+
// Escape only the characters that legacy Markdown actually treats as markup.
749+
return text.replace(/([_*`\[])/g, '\\$1');
750+
}
751+
742752
function parseJSON(text) {
743753
if (!text) return null;
744754
let cleaned = text.trim();

lib/delta/engine.mjs

Lines changed: 32 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,9 +66,9 @@ const RISK_KEYS = ['vix', 'hy_spread', 'urgent_posts', 'conflict_events', 'therm
6666
// ─── Semantic Hashing for Telegram Posts ─────────────────────────────────────
6767

6868
/**
69-
* Produce a normalized hash of a post's content.
70-
* Strips timestamps, normalizes numbers, lowercases — so "BREAKING: 5 missiles at 14:32"
71-
* and "Breaking: 7 missiles at 15:01" produce the same hash (both are "missile strike" signals).
69+
* Produce a normalized semantic hash of a post's content.
70+
* This is intentionally lossy and is only safe as a fallback when a stable
71+
* post identity is unavailable.
7272
*/
7373
function contentHash(text) {
7474
if (!text) return '';
@@ -83,14 +83,34 @@ function contentHash(text) {
8383
return createHash('sha256').update(normalized).digest('hex').substring(0, 12);
8484
}
8585

86+
function stablePostKey(post) {
87+
if (!post) return '';
88+
89+
const sourceId = post.postId || post.messageId || '';
90+
const channelId = post.channel || post.chat || '';
91+
const date = post.date || '';
92+
const text = (post.text || '').trim().substring(0, 200);
93+
94+
if (sourceId) return `id:${sourceId}`;
95+
if (channelId && date) {
96+
return createHash('sha256')
97+
.update(`${channelId}|${date}|${text}`)
98+
.digest('hex')
99+
.substring(0, 16);
100+
}
101+
102+
return `semantic:${contentHash(post.text)}`;
103+
}
104+
86105
// ─── Core Delta Computation ──────────────────────────────────────────────────
87106

88107
/**
89108
* @param {object} current - current sweep's synthesized data
90109
* @param {object|null} previous - previous sweep's synthesized data (null on first run)
91110
* @param {object} [thresholdOverrides] - optional: { numeric: {...}, count: {...} }
111+
* @param {Array<object>} [priorRuns] - optional compacted prior runs for broader dedup
92112
*/
93-
export function computeDelta(current, previous, thresholdOverrides = {}) {
113+
export function computeDelta(current, previous, thresholdOverrides = {}, priorRuns = []) {
94114
if (!previous) return null;
95115
if (!current) return null;
96116

@@ -152,16 +172,21 @@ export function computeDelta(current, previous, thresholdOverrides = {}) {
152172

153173
// ─── New urgent Telegram posts (semantic dedup) ──────────────────────
154174

175+
// Dedup against all recent runs (not just the last one) to catch posts that
176+
// drop out of one sweep but reappear in a later one. Use stable post identity
177+
// where possible so updated posts are not collapsed into earlier alerts just
178+
// because their text is semantically similar.
179+
const sources = priorRuns.length > 0 ? priorRuns : [previous];
155180
const prevHashes = new Set(
156-
(previous.tg?.urgent || []).map(p => contentHash(p.text))
181+
sources.flatMap(run => (run?.tg?.urgent || []).map(stablePostKey)).filter(Boolean)
157182
);
158183

159184
for (const post of (current.tg?.urgent || [])) {
160-
const hash = contentHash(post.text);
185+
const hash = stablePostKey(post);
161186
if (hash && !prevHashes.has(hash)) {
162187
signals.new.push({
163188
key: `tg_urgent:${hash}`,
164-
text: post.text?.substring(0, 120),
189+
text: post.text,
165190
item: post,
166191
reason: 'New urgent OSINT post',
167192
});

lib/delta/memory.mjs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,9 @@ export class MemoryManager {
7474
// Add a new run to hot memory
7575
addRun(synthesizedData) {
7676
const previous = this.getLastRun();
77-
const delta = computeDelta(synthesizedData, previous);
77+
// Collect urgent post hashes from all hot runs for broader dedup window
78+
const priorRuns = this.hot.runs.map(r => r.data);
79+
const delta = computeDelta(synthesizedData, previous, {}, priorRuns);
7880

7981
// Compact the data for storage (strip large arrays)
8082
const compact = this._compactForStorage(synthesizedData);
@@ -199,7 +201,15 @@ export class MemoryManager {
199201
bls: data.bls,
200202
treasury: data.treasury,
201203
gscpi: data.gscpi,
202-
tg: { posts: data.tg?.posts, urgent: (data.tg?.urgent || []).map(p => ({ text: p.text?.substring(0, 80), date: p.date })) },
204+
tg: {
205+
posts: data.tg?.posts,
206+
urgent: (data.tg?.urgent || []).map(p => ({
207+
text: p.text,
208+
date: p.date,
209+
channel: p.channel || p.chat || null,
210+
postId: p.postId || null,
211+
})),
212+
},
203213
thermal: (data.thermal || []).map(t => ({ region: t.region, det: t.det, night: t.night, hc: t.hc })),
204214
air: (data.air || []).map(a => ({ region: a.region, total: a.total })),
205215
nuke: (data.nuke || []).map(n => ({ site: n.site, anom: n.anom, cpm: n.cpm })),

lib/llm/ideas.mjs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,10 +88,20 @@ function compactSweepForLLM(data, delta, previousIdeas) {
8888
sections.push(`SUPPLY_CHAIN: GSCPI=${data.gscpi.value} (${data.gscpi.interpretation})`);
8989
}
9090

91-
// Geopolitical signals
91+
// Geopolitical signals (cap total OSINT text to ~1500 chars to keep prompt compact)
9292
const urgentPosts = (data.tg?.urgent || []).slice(0, 5);
9393
if (urgentPosts.length) {
94-
sections.push(`URGENT_OSINT:\n${urgentPosts.map(p => `- ${(p.text || '').substring(0, 120)}`).join('\n')}`);
94+
const MAX_OSINT_CHARS = 1500;
95+
let remaining = MAX_OSINT_CHARS;
96+
const lines = [];
97+
for (const p of urgentPosts) {
98+
const text = p.text || '';
99+
if (remaining <= 0) break;
100+
const trimmed = text.length > remaining ? text.substring(0, remaining) + '…' : text;
101+
lines.push(`- ${trimmed}`);
102+
remaining -= trimmed.length;
103+
}
104+
sections.push(`URGENT_OSINT:\n${lines.join('\n')}`);
95105
}
96106

97107
// Thermal / fire detections

0 commit comments

Comments
 (0)