fix: children of trash views and other user's private space should not appear in search (#1146)

This commit is contained in:
Khor Shu Heng 2025-01-10 16:30:35 +08:00 committed by GitHub
parent fa9d53461b
commit 832bd7f9c9
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
6 changed files with 187 additions and 67 deletions

View File

@ -17,7 +17,8 @@ use collab_database::database::{Database, DatabaseContext};
use collab_database::workspace_database::WorkspaceDatabase;
use collab_document::document::Document;
use collab_entity::CollabType;
use collab_folder::Folder;
use collab_folder::hierarchy_builder::NestedChildViewBuilder;
use collab_folder::{Folder, ViewLayout};
use collab_user::core::UserAwareness;
use mime::Mime;
use serde::Deserialize;
@ -33,7 +34,7 @@ use client_api::collab_sync::{SinkConfig, SyncObject, SyncPlugin};
use client_api::entity::id::user_awareness_object_id;
use client_api::entity::{
PublishCollabItem, PublishCollabMetadata, QueryWorkspaceMember, QuestionStream,
QuestionStreamValue,
QuestionStreamValue, UpdateCollabWebParams,
};
use client_api::ws::{WSClient, WSClientConfig};
use database_entity::dto::{
@ -140,6 +141,50 @@ impl TestClient {
Self::new(registered_user, false).await
}
pub async fn insert_view_to_general_space(
&self,
workspace_id: &str,
view_id: &str,
view_name: &str,
view_layout: ViewLayout,
) {
let mut folder = self.get_folder(workspace_id).await;
let general_space_id = folder
.get_view(workspace_id)
.unwrap()
.children
.first()
.unwrap()
.clone();
let view = NestedChildViewBuilder::new(self.uid().await, general_space_id.id.clone())
.with_view_id(view_id.to_string())
.with_name(view_name)
.with_layout(view_layout)
.build()
.view;
{
let mut txn = folder.collab.transact_mut();
folder.body.views.insert(&mut txn, view, None);
}
let folder_collab_type = CollabType::Folder;
self
.api_client
.update_web_collab(
workspace_id,
workspace_id,
UpdateCollabWebParams {
doc_state: folder
.encode_collab_v1(|c| folder_collab_type.validate_require_data(c))
.unwrap()
.doc_state
.to_vec(),
collab_type: CollabType::Folder,
},
)
.await
.unwrap();
}
pub async fn get_folder(&self, workspace_id: &str) -> Folder {
let uid = self.uid().await;
let folder_collab = self

View File

@ -37,7 +37,7 @@ pub async fn search_documents<'a, E: Executor<'a, Database = Postgres>>(
FROM af_collab_embeddings em
JOIN af_collab collab ON em.oid = collab.oid AND em.partition_key = collab.partition_key
JOIN af_user u ON collab.owner_uid = u.uid
WHERE collab.workspace_id = $2 AND NOT(collab.oid = ANY($7::text[]))
WHERE collab.workspace_id = $2 AND (collab.oid = ANY($7::text[]))
ORDER BY em.embedding <=> $3
LIMIT $5
"#,
@ -48,7 +48,7 @@ pub async fn search_documents<'a, E: Executor<'a, Database = Postgres>>(
.bind(params.preview)
.bind(params.limit)
.bind(tokens_used as i64)
.bind(params.non_viewable_view_ids);
.bind(params.searchable_view_ids);
let rows = query.fetch_all(executor).await?;
Ok(rows)
}
@ -66,7 +66,7 @@ pub struct SearchDocumentParams {
/// Embedding of the query - generated by OpenAI embedder.
pub embedding: Vec<f32>,
/// List of view ids which is not supposed to be returned in the search results.
pub non_viewable_view_ids: Vec<String>,
pub searchable_view_ids: Vec<String>,
}
#[derive(Debug, Clone, sqlx::FromRow)]

View File

@ -9,43 +9,43 @@ use shared_entity::dto::workspace_dto::{
};
use uuid::Uuid;
pub struct PrivateAndNonviewableViews {
pub my_private_view_ids: HashSet<String>,
pub nonviewable_view_ids: HashSet<String>,
pub struct PrivateSpaceAndTrashViews {
pub my_private_space_ids: HashSet<String>,
pub other_private_space_ids: HashSet<String>,
pub view_ids_in_trash: HashSet<String>,
}
pub fn private_and_nonviewable_view_ids(folder: &Folder) -> PrivateAndNonviewableViews {
let mut nonviewable_view_ids = HashSet::new();
let mut my_private_view_ids = HashSet::new();
pub fn private_space_and_trash_view_ids(folder: &Folder) -> PrivateSpaceAndTrashViews {
let mut view_ids_in_trash = HashSet::new();
let mut my_private_space_ids = HashSet::new();
let mut other_private_space_ids = HashSet::new();
for private_section in folder.get_my_private_sections() {
my_private_view_ids.insert(private_section.id);
match folder.get_view(&private_section.id) {
Some(private_view) if check_if_view_is_space(&private_view) => {
my_private_space_ids.insert(private_section.id.clone());
},
_ => (),
}
}
for private_section in folder.get_all_private_sections() {
if let Some(private_view) = folder.get_view(&private_section.id) {
if check_if_view_is_space(&private_view) && !my_private_view_ids.contains(&private_section.id)
match folder.get_view(&private_section.id) {
Some(private_view)
if check_if_view_is_space(&private_view)
&& !my_private_space_ids.contains(&private_section.id) =>
{
nonviewable_view_ids.insert(private_section.id);
let private_view_ids_in_space: HashSet<String> = folder
.get_views_belong_to(&private_view.id)
.iter()
.map(|v| v.id.clone())
.collect();
nonviewable_view_ids.extend(private_view_ids_in_space);
}
other_private_space_ids.insert(private_section.id.clone());
},
_ => (),
}
}
for trash_view in folder.get_all_trash_sections() {
nonviewable_view_ids.insert(trash_view.id.clone());
let child_views_for_trash: HashSet<String> = folder
.get_views_belong_to(&trash_view.id)
.iter()
.map(|v| v.id.clone())
.collect();
nonviewable_view_ids.extend(child_views_for_trash);
view_ids_in_trash.insert(trash_view.id.clone());
}
PrivateAndNonviewableViews {
my_private_view_ids,
nonviewable_view_ids,
PrivateSpaceAndTrashViews {
my_private_space_ids,
other_private_space_ids,
view_ids_in_trash,
}
}
@ -57,15 +57,14 @@ pub fn collab_folder_to_folder_view(
max_depth: u32,
pubished_view_ids: &HashSet<String>,
) -> Result<FolderView, AppError> {
let private_and_nonviewable_views = private_and_nonviewable_view_ids(folder);
let private_space_and_trash_view_ids = private_space_and_trash_view_ids(folder);
to_folder_view(
workspace_id,
"",
root_view_id,
folder,
&private_and_nonviewable_views.nonviewable_view_ids,
&private_and_nonviewable_views.my_private_view_ids,
&private_space_and_trash_view_ids,
pubished_view_ids,
false,
0,
@ -83,14 +82,23 @@ fn to_folder_view(
parent_view_id: &str,
view_id: &str,
folder: &Folder,
unviewable: &HashSet<String>,
private_view_ids: &HashSet<String>,
private_space_and_trash_views: &PrivateSpaceAndTrashViews,
published_view_ids: &HashSet<String>,
parent_is_private: bool,
depth: u32,
max_depth: u32,
) -> Option<FolderView> {
if depth > max_depth || unviewable.contains(view_id) {
let is_trash = private_space_and_trash_views
.view_ids_in_trash
.contains(view_id);
let is_my_private_space = private_space_and_trash_views
.my_private_space_ids
.contains(view_id);
let is_other_private_space = private_space_and_trash_views
.other_private_space_ids
.contains(view_id);
if depth > max_depth || is_other_private_space || is_trash {
return None;
}
@ -114,7 +122,7 @@ fn to_folder_view(
return None;
}
let is_private = parent_is_private || (view_is_space && private_view_ids.contains(view_id));
let is_private = parent_is_private || is_my_private_space;
let extra = view.extra.as_deref().map(|extra| {
serde_json::from_str::<serde_json::Value>(extra).unwrap_or_else(|e| {
tracing::warn!("failed to parse extra field({}): {}", extra, e);
@ -130,8 +138,7 @@ fn to_folder_view(
view_id,
&child_view_id.id,
folder,
unviewable,
private_view_ids,
private_space_and_trash_views,
published_view_ids,
is_private,
depth + 1,
@ -267,7 +274,7 @@ pub fn check_if_view_ancestors_fulfil_condition(
return true;
}
current_view_id = view.parent_view_id.clone();
if current_view_id.is_empty() {
if current_view_id.is_empty() || current_view_id == view.id {
return false;
}
}

View File

@ -1,15 +1,18 @@
use crate::api::metrics::RequestMetrics;
use crate::biz::collab::folder_view::private_and_nonviewable_view_ids;
use crate::biz::collab::folder_view::PrivateSpaceAndTrashViews;
use crate::biz::collab::utils::get_latest_collab_folder;
use crate::{
api::metrics::RequestMetrics, biz::collab::folder_view::private_space_and_trash_view_ids,
};
use app_error::AppError;
use appflowy_ai_client::dto::{
EmbeddingEncodingFormat, EmbeddingInput, EmbeddingModel, EmbeddingOutput, EmbeddingRequest,
};
use appflowy_collaborate::collab::storage::CollabAccessControlStorage;
use collab_folder::{Folder, View};
use database::collab::GetCollabOrigin;
use itertools::Itertools;
use std::collections::HashSet;
use std::sync::Arc;
use tracing::info;
use database::index::{search_documents, SearchDocumentParams};
use shared_entity::dto::search_dto::{
@ -20,12 +23,60 @@ use sqlx::PgPool;
use indexer::scheduler::IndexerScheduler;
use uuid::Uuid;
static MAX_SEARCH_DEPTH: i32 = 10;
fn is_view_searchable(view: &View, workspace_id: &str) -> bool {
view.id != workspace_id && view.parent_view_id != workspace_id && view.layout.is_document()
}
fn populate_searchable_view_ids(
folder: &Folder,
private_space_and_trash_views: &PrivateSpaceAndTrashViews,
searchable_view_ids: &mut HashSet<String>,
workspace_id: &str,
current_view_id: &str,
depth: i32,
max_depth: i32,
) {
if depth > max_depth {
return;
}
let is_other_private_space = private_space_and_trash_views
.other_private_space_ids
.contains(current_view_id);
let is_trash = private_space_and_trash_views
.view_ids_in_trash
.contains(current_view_id);
if is_other_private_space || is_trash {
return;
}
let view = match folder.get_view(current_view_id) {
Some(view) => view,
None => return,
};
if is_view_searchable(&view, workspace_id) {
searchable_view_ids.insert(current_view_id.to_string());
}
for child in view.children.iter() {
populate_searchable_view_ids(
folder,
private_space_and_trash_views,
searchable_view_ids,
workspace_id,
&child.id,
depth + 1,
max_depth,
);
}
}
pub async fn search_document(
pg_pool: &PgPool,
collab_storage: &CollabAccessControlStorage,
indexer_scheduler: &Arc<IndexerScheduler>,
uid: i64,
workspace_id: Uuid,
workspace_uuid: Uuid,
request: SearchDocumentRequest,
metrics: &RequestMetrics,
) -> Result<Vec<SearchDocumentResponseItem>, AppError> {
@ -38,10 +89,10 @@ pub async fn search_document(
})
.await?;
let total_tokens = embeddings.usage.total_tokens as u32;
metrics.record_search_tokens_used(&workspace_id, total_tokens);
metrics.record_search_tokens_used(&workspace_uuid, total_tokens);
tracing::info!(
"workspace {} OpenAI API search tokens used: {}",
workspace_id,
workspace_uuid,
total_tokens
);
@ -61,32 +112,30 @@ pub async fn search_document(
let folder = get_latest_collab_folder(
collab_storage,
GetCollabOrigin::User { uid },
&workspace_id.to_string(),
&workspace_uuid.to_string(),
)
.await?;
let private_and_nonviewable_views = private_and_nonviewable_view_ids(&folder);
let space_ids: HashSet<String> = folder
.get_view(&workspace_id.to_string())
.ok_or_else(|| AppError::Internal(anyhow::anyhow!("Workspace view not found in folder")))?
.children
.iter()
.map(|c| c.id.clone())
.collect();
let mut non_searchable_view_ids = private_and_nonviewable_views.nonviewable_view_ids;
non_searchable_view_ids.extend(space_ids);
let private_space_and_trash_views = private_space_and_trash_view_ids(&folder);
let mut searchable_view_ids = HashSet::new();
populate_searchable_view_ids(
&folder,
&private_space_and_trash_views,
&mut searchable_view_ids,
&workspace_uuid.to_string(),
&workspace_uuid.to_string(),
0,
MAX_SEARCH_DEPTH,
);
info!("{:?}", searchable_view_ids);
let results = search_documents(
pg_pool,
SearchDocumentParams {
user_id: uid,
workspace_id,
workspace_id: workspace_uuid,
limit: request.limit.unwrap_or(10) as i32,
preview: request.preview_size.unwrap_or(500) as i32,
embedding,
non_viewable_view_ids: non_searchable_view_ids
.iter()
.map(|uuid| uuid.to_string())
.collect_vec(),
searchable_view_ids: searchable_view_ids.into_iter().collect(),
},
total_tokens,
)
@ -94,7 +143,7 @@ pub async fn search_document(
tracing::trace!(
"user {} search request in workspace {} returned {} results for query: `{}`",
uid,
workspace_id,
workspace_uuid,
results.len(),
request.query
);

View File

@ -50,6 +50,14 @@ async fn document_full_sync_then_search_test() {
collab_type: CollabType::Document,
};
test_client.api_client.create_collab(params).await.unwrap();
test_client
.insert_view_to_general_space(
&workspace_id,
&object_id,
"AppFlowy",
collab_folder::ViewLayout::Document,
)
.await;
let contents = vec![
"AppFlowy is an open-source project.",

View File

@ -7,6 +7,7 @@ use collab::preclude::Collab;
use collab_document::document::Document;
use collab_document::importer::md_importer::MDImporter;
use collab_entity::CollabType;
use collab_folder::ViewLayout;
use shared_entity::dto::chat_dto::{CreateChatMessageParams, CreateChatParams};
use tokio::time::sleep;
use workspace_template::document::getting_started::getting_started_document_data;
@ -15,7 +16,6 @@ use workspace_template::document::getting_started::getting_started_document_data
async fn test_embedding_when_create_document() {
let mut test_client = TestClient::new_user().await;
let workspace_id = test_client.workspace_id().await;
let object_id_1 = uuid::Uuid::new_v4().to_string();
let the_five_dysfunctions_of_a_team =
create_document_collab(&object_id_1, "the_five_dysfunctions_of_a_team.md").await;
@ -29,6 +29,14 @@ async fn test_embedding_when_create_document() {
)
.await
.unwrap();
test_client
.insert_view_to_general_space(
&workspace_id,
&object_id_1,
"five dysfunctional",
ViewLayout::Document,
)
.await;
test_client
.wait_until_get_embedding(&workspace_id, &object_id_1)
@ -46,6 +54,9 @@ async fn test_embedding_when_create_document() {
)
.await
.unwrap();
test_client
.insert_view_to_general_space(&workspace_id, &object_id_2, "tennis", ViewLayout::Document)
.await;
test_client
.wait_until_get_embedding(&workspace_id, &object_id_2)