From 79c141d5f9be1323f788c70c5a15d9b87399f7d4 Mon Sep 17 00:00:00 2001 From: Flavien David Date: Fri, 21 Mar 2025 17:02:08 +0100 Subject: [PATCH] Support Searching Data Source Nodes Source URL --- core/src/search_stores/search_store.rs | 28 ++++++++++++++----- .../input_bar/InputBarAttachmentsPicker.tsx | 1 + front/lib/api/search.ts | 15 ++++++++-- front/lib/swr/spaces.ts | 12 +++++--- front/types/core/core_api.ts | 3 +- 5 files changed, 44 insertions(+), 15 deletions(-) diff --git a/core/src/search_stores/search_store.rs b/core/src/search_stores/search_store.rs index e5eb8584221e..acf5164c13c8 100644 --- a/core/src/search_stores/search_store.rs +++ b/core/src/search_stores/search_store.rs @@ -79,6 +79,8 @@ pub struct NodesSearchOptions { // It will sort by title desc, then by updated_at asc, as per // elasticsearch's sort syntax (although it's a small subset of it) sort: Option>, + // Whether to search within source URLs when matching the query. + search_source_urls: Option, } #[derive(serde::Deserialize, Clone, Debug)] @@ -161,6 +163,7 @@ impl Default for NodesSearchOptions { limit: Some(MAX_PAGE_SIZE), cursor: None, sort: None, + search_source_urls: Some(false), } } } @@ -292,7 +295,7 @@ impl SearchStore for ElasticsearchSearchStore { // Build search query with potential truncation. let (bool_query, indices_to_query, warning_code) = - self.build_search_node_query(query.clone(), filter)?; + self.build_search_node_query(query.clone(), filter, &options)?; let sort = match query { None => self.build_search_nodes_sort(options.sort)?, @@ -313,12 +316,12 @@ impl SearchStore for ElasticsearchSearchStore { let json_str = String::from_utf8(decoded)?; let search_after: Vec = serde_json::from_str(&json_str)?; - // We replace empty strings with a “high sort” sentinel so that documents with + // We replace empty strings with a "high sort" sentinel so that documents with // an originally empty title will appear at the end of ascending sort order. // - // Elasticsearch’s Rust client (or DSL) has trouble when search_after contains "". + // Elasticsearch's Rust client (or DSL) has trouble when search_after contains "". // By substituting a high-Unicode character ("\u{10FFFF}"), we ensure those items - // sort last without breaking the library’s internal validation. + // sort last without breaking the library's internal validation. // // Will be removed once we don't have empty strings titles anymore. let fixed_sort = search_after @@ -653,6 +656,7 @@ impl ElasticsearchSearchStore { &self, query: Option, filter: NodesSearchFilter, + options: &NodesSearchOptions, ) -> Result<(BoolQuery, Vec<&str>, Option)> { let mut indices_to_query = vec![]; @@ -692,7 +696,7 @@ impl ElasticsearchSearchStore { if !counter.is_full() { let nodes_query = Query::bool() .filter(Query::term("_index", DATA_SOURCE_NODE_INDEX_NAME)) - .filter(self.build_nodes_content_query(&query, &filter, &mut counter)?); + .filter(self.build_nodes_content_query(&query, &filter, options, &mut counter)?); should_queries.push(nodes_query); indices_to_query.push(DATA_SOURCE_NODE_INDEX_NAME); @@ -817,6 +821,7 @@ impl ElasticsearchSearchStore { &self, query: &Option, filter: &NodesSearchFilter, + options: &NodesSearchOptions, counter: &mut QueryClauseCounter, ) -> Result { let mut bool_query = Query::bool().filter(self.build_shared_permission_filter( @@ -862,8 +867,17 @@ impl ElasticsearchSearchStore { // Add search term if present. if let Some(query_string) = query.clone() { counter.add(1); - bool_query = - bool_query.must(self.build_match_query("title", &query_string, counter)?); + let mut search_bool = + Query::bool().should(self.build_match_query("title", &query_string, counter)?); + + // Only add source_url filter if search_source_urls is true + // This creates an OR between title and source_url matches. + if options.search_source_urls.unwrap_or(false) { + counter.add(1); + search_bool = search_bool.should(Query::term("source_url", query_string)); + } + + bool_query = bool_query.must(search_bool.minimum_should_match(1)); } Ok(bool_query) diff --git a/front/components/assistant/conversation/input_bar/InputBarAttachmentsPicker.tsx b/front/components/assistant/conversation/input_bar/InputBarAttachmentsPicker.tsx index 12be35413e0c..cdab7302ea2b 100644 --- a/front/components/assistant/conversation/input_bar/InputBarAttachmentsPicker.tsx +++ b/front/components/assistant/conversation/input_bar/InputBarAttachmentsPicker.tsx @@ -63,6 +63,7 @@ export const InputBarAttachmentsPicker = ({ viewType: "all", disabled: isSpacesLoading || !searchQuery, spaceIds: spaces.map((s) => s.sId), + searchSourceUrls: true, }); const attachedNodeIds = useMemo(() => { diff --git a/front/lib/api/search.ts b/front/lib/api/search.ts index 5547cfd51721..e65d0c0bd01e 100644 --- a/front/lib/api/search.ts +++ b/front/lib/api/search.ts @@ -56,6 +56,7 @@ const TextSearchBody = t.intersection([ }), t.partial({ nodeIds: t.undefined, + searchSourceUrls: t.boolean, }), ]); @@ -66,6 +67,7 @@ const NodeIdSearchBody = t.intersection([ }), t.partial({ query: t.undefined, + searchSourceUrls: t.undefined, }), ]); @@ -78,8 +80,14 @@ export async function handleSearch( auth: Authenticator, searchParams: SearchRequestBodyType ): Promise> { - const { query, includeDataSources, viewType, spaceIds, nodeIds } = - searchParams; + const { + query, + includeDataSources, + viewType, + spaceIds, + nodeIds, + searchSourceUrls, + } = searchParams; const spaces = await SpaceResource.listWorkspaceSpacesAsMember(auth); if (!spaces.length) { @@ -149,8 +157,9 @@ export async function handleSearch( query, filter: searchFilterResult, options: { - limit: paginationRes.value?.limit, cursor: paginationRes.value?.cursor ?? undefined, + limit: paginationRes.value?.limit, + search_source_urls: searchSourceUrls, }, }); diff --git a/front/lib/swr/spaces.ts b/front/lib/swr/spaces.ts index 97625cdd7762..4a6e9ed9f760 100644 --- a/front/lib/swr/spaces.ts +++ b/front/lib/swr/spaces.ts @@ -699,12 +699,14 @@ type BaseSearchParams = { type TextSearchParams = BaseSearchParams & { search: string; nodeIds?: undefined; + searchSourceUrls?: boolean; }; // Node ID search variant type NodeIdSearchParams = BaseSearchParams & { search?: undefined; nodeIds: string[]; + searchSourceUrls?: undefined; }; type SpacesSearchParams = TextSearchParams | NodeIdSearchParams; @@ -718,6 +720,7 @@ export function useSpacesSearch({ spaceIds, viewType, pagination, + searchSourceUrls = false, }: SpacesSearchParams): { isSearchLoading: boolean; isSearchError: boolean; @@ -736,12 +739,13 @@ export function useSpacesSearch({ } const body = { - query: search, - viewType, - nodeIds, - spaceIds, includeDataSources, limit: pagination?.limit ?? DEFAULT_SEARCH_LIMIT, + nodeIds, + query: search, + searchSourceUrls, + spaceIds, + viewType, }; // Only perform a query if we have a valid search diff --git a/front/types/core/core_api.ts b/front/types/core/core_api.ts index 5de9841cccbc..e25b124f40bc 100644 --- a/front/types/core/core_api.ts +++ b/front/types/core/core_api.ts @@ -219,8 +219,9 @@ export type CoreAPISortSpec = { }; export type CoreAPISearchOptions = { - limit?: number; cursor?: string; + limit?: number; + search_source_urls?: boolean; sort?: CoreAPISortSpec[]; };