fix: children of trash views and other user's private space should not appear in search (#1146)
This commit is contained in:
parent
fa9d53461b
commit
832bd7f9c9
|
|
@ -17,7 +17,8 @@ use collab_database::database::{Database, DatabaseContext};
|
|||
use collab_database::workspace_database::WorkspaceDatabase;
|
||||
use collab_document::document::Document;
|
||||
use collab_entity::CollabType;
|
||||
use collab_folder::Folder;
|
||||
use collab_folder::hierarchy_builder::NestedChildViewBuilder;
|
||||
use collab_folder::{Folder, ViewLayout};
|
||||
use collab_user::core::UserAwareness;
|
||||
use mime::Mime;
|
||||
use serde::Deserialize;
|
||||
|
|
@ -33,7 +34,7 @@ use client_api::collab_sync::{SinkConfig, SyncObject, SyncPlugin};
|
|||
use client_api::entity::id::user_awareness_object_id;
|
||||
use client_api::entity::{
|
||||
PublishCollabItem, PublishCollabMetadata, QueryWorkspaceMember, QuestionStream,
|
||||
QuestionStreamValue,
|
||||
QuestionStreamValue, UpdateCollabWebParams,
|
||||
};
|
||||
use client_api::ws::{WSClient, WSClientConfig};
|
||||
use database_entity::dto::{
|
||||
|
|
@ -140,6 +141,50 @@ impl TestClient {
|
|||
Self::new(registered_user, false).await
|
||||
}
|
||||
|
||||
pub async fn insert_view_to_general_space(
|
||||
&self,
|
||||
workspace_id: &str,
|
||||
view_id: &str,
|
||||
view_name: &str,
|
||||
view_layout: ViewLayout,
|
||||
) {
|
||||
let mut folder = self.get_folder(workspace_id).await;
|
||||
let general_space_id = folder
|
||||
.get_view(workspace_id)
|
||||
.unwrap()
|
||||
.children
|
||||
.first()
|
||||
.unwrap()
|
||||
.clone();
|
||||
let view = NestedChildViewBuilder::new(self.uid().await, general_space_id.id.clone())
|
||||
.with_view_id(view_id.to_string())
|
||||
.with_name(view_name)
|
||||
.with_layout(view_layout)
|
||||
.build()
|
||||
.view;
|
||||
{
|
||||
let mut txn = folder.collab.transact_mut();
|
||||
folder.body.views.insert(&mut txn, view, None);
|
||||
}
|
||||
let folder_collab_type = CollabType::Folder;
|
||||
self
|
||||
.api_client
|
||||
.update_web_collab(
|
||||
workspace_id,
|
||||
workspace_id,
|
||||
UpdateCollabWebParams {
|
||||
doc_state: folder
|
||||
.encode_collab_v1(|c| folder_collab_type.validate_require_data(c))
|
||||
.unwrap()
|
||||
.doc_state
|
||||
.to_vec(),
|
||||
collab_type: CollabType::Folder,
|
||||
},
|
||||
)
|
||||
.await
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub async fn get_folder(&self, workspace_id: &str) -> Folder {
|
||||
let uid = self.uid().await;
|
||||
let folder_collab = self
|
||||
|
|
|
|||
|
|
@ -37,7 +37,7 @@ pub async fn search_documents<'a, E: Executor<'a, Database = Postgres>>(
|
|||
FROM af_collab_embeddings em
|
||||
JOIN af_collab collab ON em.oid = collab.oid AND em.partition_key = collab.partition_key
|
||||
JOIN af_user u ON collab.owner_uid = u.uid
|
||||
WHERE collab.workspace_id = $2 AND NOT(collab.oid = ANY($7::text[]))
|
||||
WHERE collab.workspace_id = $2 AND (collab.oid = ANY($7::text[]))
|
||||
ORDER BY em.embedding <=> $3
|
||||
LIMIT $5
|
||||
"#,
|
||||
|
|
@ -48,7 +48,7 @@ pub async fn search_documents<'a, E: Executor<'a, Database = Postgres>>(
|
|||
.bind(params.preview)
|
||||
.bind(params.limit)
|
||||
.bind(tokens_used as i64)
|
||||
.bind(params.non_viewable_view_ids);
|
||||
.bind(params.searchable_view_ids);
|
||||
let rows = query.fetch_all(executor).await?;
|
||||
Ok(rows)
|
||||
}
|
||||
|
|
@ -66,7 +66,7 @@ pub struct SearchDocumentParams {
|
|||
/// Embedding of the query - generated by OpenAI embedder.
|
||||
pub embedding: Vec<f32>,
|
||||
/// List of view ids which is not supposed to be returned in the search results.
|
||||
pub non_viewable_view_ids: Vec<String>,
|
||||
pub searchable_view_ids: Vec<String>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, sqlx::FromRow)]
|
||||
|
|
|
|||
|
|
@ -9,43 +9,43 @@ use shared_entity::dto::workspace_dto::{
|
|||
};
|
||||
use uuid::Uuid;
|
||||
|
||||
pub struct PrivateAndNonviewableViews {
|
||||
pub my_private_view_ids: HashSet<String>,
|
||||
pub nonviewable_view_ids: HashSet<String>,
|
||||
pub struct PrivateSpaceAndTrashViews {
|
||||
pub my_private_space_ids: HashSet<String>,
|
||||
pub other_private_space_ids: HashSet<String>,
|
||||
pub view_ids_in_trash: HashSet<String>,
|
||||
}
|
||||
|
||||
pub fn private_and_nonviewable_view_ids(folder: &Folder) -> PrivateAndNonviewableViews {
|
||||
let mut nonviewable_view_ids = HashSet::new();
|
||||
let mut my_private_view_ids = HashSet::new();
|
||||
pub fn private_space_and_trash_view_ids(folder: &Folder) -> PrivateSpaceAndTrashViews {
|
||||
let mut view_ids_in_trash = HashSet::new();
|
||||
let mut my_private_space_ids = HashSet::new();
|
||||
let mut other_private_space_ids = HashSet::new();
|
||||
for private_section in folder.get_my_private_sections() {
|
||||
my_private_view_ids.insert(private_section.id);
|
||||
match folder.get_view(&private_section.id) {
|
||||
Some(private_view) if check_if_view_is_space(&private_view) => {
|
||||
my_private_space_ids.insert(private_section.id.clone());
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
|
||||
for private_section in folder.get_all_private_sections() {
|
||||
if let Some(private_view) = folder.get_view(&private_section.id) {
|
||||
if check_if_view_is_space(&private_view) && !my_private_view_ids.contains(&private_section.id)
|
||||
match folder.get_view(&private_section.id) {
|
||||
Some(private_view)
|
||||
if check_if_view_is_space(&private_view)
|
||||
&& !my_private_space_ids.contains(&private_section.id) =>
|
||||
{
|
||||
nonviewable_view_ids.insert(private_section.id);
|
||||
let private_view_ids_in_space: HashSet<String> = folder
|
||||
.get_views_belong_to(&private_view.id)
|
||||
.iter()
|
||||
.map(|v| v.id.clone())
|
||||
.collect();
|
||||
nonviewable_view_ids.extend(private_view_ids_in_space);
|
||||
}
|
||||
other_private_space_ids.insert(private_section.id.clone());
|
||||
},
|
||||
_ => (),
|
||||
}
|
||||
}
|
||||
for trash_view in folder.get_all_trash_sections() {
|
||||
nonviewable_view_ids.insert(trash_view.id.clone());
|
||||
let child_views_for_trash: HashSet<String> = folder
|
||||
.get_views_belong_to(&trash_view.id)
|
||||
.iter()
|
||||
.map(|v| v.id.clone())
|
||||
.collect();
|
||||
nonviewable_view_ids.extend(child_views_for_trash);
|
||||
view_ids_in_trash.insert(trash_view.id.clone());
|
||||
}
|
||||
PrivateAndNonviewableViews {
|
||||
my_private_view_ids,
|
||||
nonviewable_view_ids,
|
||||
PrivateSpaceAndTrashViews {
|
||||
my_private_space_ids,
|
||||
other_private_space_ids,
|
||||
view_ids_in_trash,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -57,15 +57,14 @@ pub fn collab_folder_to_folder_view(
|
|||
max_depth: u32,
|
||||
pubished_view_ids: &HashSet<String>,
|
||||
) -> Result<FolderView, AppError> {
|
||||
let private_and_nonviewable_views = private_and_nonviewable_view_ids(folder);
|
||||
let private_space_and_trash_view_ids = private_space_and_trash_view_ids(folder);
|
||||
|
||||
to_folder_view(
|
||||
workspace_id,
|
||||
"",
|
||||
root_view_id,
|
||||
folder,
|
||||
&private_and_nonviewable_views.nonviewable_view_ids,
|
||||
&private_and_nonviewable_views.my_private_view_ids,
|
||||
&private_space_and_trash_view_ids,
|
||||
pubished_view_ids,
|
||||
false,
|
||||
0,
|
||||
|
|
@ -83,14 +82,23 @@ fn to_folder_view(
|
|||
parent_view_id: &str,
|
||||
view_id: &str,
|
||||
folder: &Folder,
|
||||
unviewable: &HashSet<String>,
|
||||
private_view_ids: &HashSet<String>,
|
||||
private_space_and_trash_views: &PrivateSpaceAndTrashViews,
|
||||
published_view_ids: &HashSet<String>,
|
||||
parent_is_private: bool,
|
||||
depth: u32,
|
||||
max_depth: u32,
|
||||
) -> Option<FolderView> {
|
||||
if depth > max_depth || unviewable.contains(view_id) {
|
||||
let is_trash = private_space_and_trash_views
|
||||
.view_ids_in_trash
|
||||
.contains(view_id);
|
||||
let is_my_private_space = private_space_and_trash_views
|
||||
.my_private_space_ids
|
||||
.contains(view_id);
|
||||
let is_other_private_space = private_space_and_trash_views
|
||||
.other_private_space_ids
|
||||
.contains(view_id);
|
||||
|
||||
if depth > max_depth || is_other_private_space || is_trash {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
|
@ -114,7 +122,7 @@ fn to_folder_view(
|
|||
return None;
|
||||
}
|
||||
|
||||
let is_private = parent_is_private || (view_is_space && private_view_ids.contains(view_id));
|
||||
let is_private = parent_is_private || is_my_private_space;
|
||||
let extra = view.extra.as_deref().map(|extra| {
|
||||
serde_json::from_str::<serde_json::Value>(extra).unwrap_or_else(|e| {
|
||||
tracing::warn!("failed to parse extra field({}): {}", extra, e);
|
||||
|
|
@ -130,8 +138,7 @@ fn to_folder_view(
|
|||
view_id,
|
||||
&child_view_id.id,
|
||||
folder,
|
||||
unviewable,
|
||||
private_view_ids,
|
||||
private_space_and_trash_views,
|
||||
published_view_ids,
|
||||
is_private,
|
||||
depth + 1,
|
||||
|
|
@ -267,7 +274,7 @@ pub fn check_if_view_ancestors_fulfil_condition(
|
|||
return true;
|
||||
}
|
||||
current_view_id = view.parent_view_id.clone();
|
||||
if current_view_id.is_empty() {
|
||||
if current_view_id.is_empty() || current_view_id == view.id {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,15 +1,18 @@
|
|||
use crate::api::metrics::RequestMetrics;
|
||||
use crate::biz::collab::folder_view::private_and_nonviewable_view_ids;
|
||||
use crate::biz::collab::folder_view::PrivateSpaceAndTrashViews;
|
||||
use crate::biz::collab::utils::get_latest_collab_folder;
|
||||
use crate::{
|
||||
api::metrics::RequestMetrics, biz::collab::folder_view::private_space_and_trash_view_ids,
|
||||
};
|
||||
use app_error::AppError;
|
||||
use appflowy_ai_client::dto::{
|
||||
EmbeddingEncodingFormat, EmbeddingInput, EmbeddingModel, EmbeddingOutput, EmbeddingRequest,
|
||||
};
|
||||
use appflowy_collaborate::collab::storage::CollabAccessControlStorage;
|
||||
use collab_folder::{Folder, View};
|
||||
use database::collab::GetCollabOrigin;
|
||||
use itertools::Itertools;
|
||||
use std::collections::HashSet;
|
||||
use std::sync::Arc;
|
||||
use tracing::info;
|
||||
|
||||
use database::index::{search_documents, SearchDocumentParams};
|
||||
use shared_entity::dto::search_dto::{
|
||||
|
|
@ -20,12 +23,60 @@ use sqlx::PgPool;
|
|||
use indexer::scheduler::IndexerScheduler;
|
||||
use uuid::Uuid;
|
||||
|
||||
static MAX_SEARCH_DEPTH: i32 = 10;
|
||||
|
||||
fn is_view_searchable(view: &View, workspace_id: &str) -> bool {
|
||||
view.id != workspace_id && view.parent_view_id != workspace_id && view.layout.is_document()
|
||||
}
|
||||
|
||||
fn populate_searchable_view_ids(
|
||||
folder: &Folder,
|
||||
private_space_and_trash_views: &PrivateSpaceAndTrashViews,
|
||||
searchable_view_ids: &mut HashSet<String>,
|
||||
workspace_id: &str,
|
||||
current_view_id: &str,
|
||||
depth: i32,
|
||||
max_depth: i32,
|
||||
) {
|
||||
if depth > max_depth {
|
||||
return;
|
||||
}
|
||||
let is_other_private_space = private_space_and_trash_views
|
||||
.other_private_space_ids
|
||||
.contains(current_view_id);
|
||||
let is_trash = private_space_and_trash_views
|
||||
.view_ids_in_trash
|
||||
.contains(current_view_id);
|
||||
if is_other_private_space || is_trash {
|
||||
return;
|
||||
}
|
||||
let view = match folder.get_view(current_view_id) {
|
||||
Some(view) => view,
|
||||
None => return,
|
||||
};
|
||||
|
||||
if is_view_searchable(&view, workspace_id) {
|
||||
searchable_view_ids.insert(current_view_id.to_string());
|
||||
}
|
||||
for child in view.children.iter() {
|
||||
populate_searchable_view_ids(
|
||||
folder,
|
||||
private_space_and_trash_views,
|
||||
searchable_view_ids,
|
||||
workspace_id,
|
||||
&child.id,
|
||||
depth + 1,
|
||||
max_depth,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
pub async fn search_document(
|
||||
pg_pool: &PgPool,
|
||||
collab_storage: &CollabAccessControlStorage,
|
||||
indexer_scheduler: &Arc<IndexerScheduler>,
|
||||
uid: i64,
|
||||
workspace_id: Uuid,
|
||||
workspace_uuid: Uuid,
|
||||
request: SearchDocumentRequest,
|
||||
metrics: &RequestMetrics,
|
||||
) -> Result<Vec<SearchDocumentResponseItem>, AppError> {
|
||||
|
|
@ -38,10 +89,10 @@ pub async fn search_document(
|
|||
})
|
||||
.await?;
|
||||
let total_tokens = embeddings.usage.total_tokens as u32;
|
||||
metrics.record_search_tokens_used(&workspace_id, total_tokens);
|
||||
metrics.record_search_tokens_used(&workspace_uuid, total_tokens);
|
||||
tracing::info!(
|
||||
"workspace {} OpenAI API search tokens used: {}",
|
||||
workspace_id,
|
||||
workspace_uuid,
|
||||
total_tokens
|
||||
);
|
||||
|
||||
|
|
@ -61,32 +112,30 @@ pub async fn search_document(
|
|||
let folder = get_latest_collab_folder(
|
||||
collab_storage,
|
||||
GetCollabOrigin::User { uid },
|
||||
&workspace_id.to_string(),
|
||||
&workspace_uuid.to_string(),
|
||||
)
|
||||
.await?;
|
||||
let private_and_nonviewable_views = private_and_nonviewable_view_ids(&folder);
|
||||
let space_ids: HashSet<String> = folder
|
||||
.get_view(&workspace_id.to_string())
|
||||
.ok_or_else(|| AppError::Internal(anyhow::anyhow!("Workspace view not found in folder")))?
|
||||
.children
|
||||
.iter()
|
||||
.map(|c| c.id.clone())
|
||||
.collect();
|
||||
|
||||
let mut non_searchable_view_ids = private_and_nonviewable_views.nonviewable_view_ids;
|
||||
non_searchable_view_ids.extend(space_ids);
|
||||
let private_space_and_trash_views = private_space_and_trash_view_ids(&folder);
|
||||
let mut searchable_view_ids = HashSet::new();
|
||||
populate_searchable_view_ids(
|
||||
&folder,
|
||||
&private_space_and_trash_views,
|
||||
&mut searchable_view_ids,
|
||||
&workspace_uuid.to_string(),
|
||||
&workspace_uuid.to_string(),
|
||||
0,
|
||||
MAX_SEARCH_DEPTH,
|
||||
);
|
||||
info!("{:?}", searchable_view_ids);
|
||||
let results = search_documents(
|
||||
pg_pool,
|
||||
SearchDocumentParams {
|
||||
user_id: uid,
|
||||
workspace_id,
|
||||
workspace_id: workspace_uuid,
|
||||
limit: request.limit.unwrap_or(10) as i32,
|
||||
preview: request.preview_size.unwrap_or(500) as i32,
|
||||
embedding,
|
||||
non_viewable_view_ids: non_searchable_view_ids
|
||||
.iter()
|
||||
.map(|uuid| uuid.to_string())
|
||||
.collect_vec(),
|
||||
searchable_view_ids: searchable_view_ids.into_iter().collect(),
|
||||
},
|
||||
total_tokens,
|
||||
)
|
||||
|
|
@ -94,7 +143,7 @@ pub async fn search_document(
|
|||
tracing::trace!(
|
||||
"user {} search request in workspace {} returned {} results for query: `{}`",
|
||||
uid,
|
||||
workspace_id,
|
||||
workspace_uuid,
|
||||
results.len(),
|
||||
request.query
|
||||
);
|
||||
|
|
|
|||
|
|
@ -50,6 +50,14 @@ async fn document_full_sync_then_search_test() {
|
|||
collab_type: CollabType::Document,
|
||||
};
|
||||
test_client.api_client.create_collab(params).await.unwrap();
|
||||
test_client
|
||||
.insert_view_to_general_space(
|
||||
&workspace_id,
|
||||
&object_id,
|
||||
"AppFlowy",
|
||||
collab_folder::ViewLayout::Document,
|
||||
)
|
||||
.await;
|
||||
|
||||
let contents = vec![
|
||||
"AppFlowy is an open-source project.",
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ use collab::preclude::Collab;
|
|||
use collab_document::document::Document;
|
||||
use collab_document::importer::md_importer::MDImporter;
|
||||
use collab_entity::CollabType;
|
||||
use collab_folder::ViewLayout;
|
||||
use shared_entity::dto::chat_dto::{CreateChatMessageParams, CreateChatParams};
|
||||
use tokio::time::sleep;
|
||||
use workspace_template::document::getting_started::getting_started_document_data;
|
||||
|
|
@ -15,7 +16,6 @@ use workspace_template::document::getting_started::getting_started_document_data
|
|||
async fn test_embedding_when_create_document() {
|
||||
let mut test_client = TestClient::new_user().await;
|
||||
let workspace_id = test_client.workspace_id().await;
|
||||
|
||||
let object_id_1 = uuid::Uuid::new_v4().to_string();
|
||||
let the_five_dysfunctions_of_a_team =
|
||||
create_document_collab(&object_id_1, "the_five_dysfunctions_of_a_team.md").await;
|
||||
|
|
@ -29,6 +29,14 @@ async fn test_embedding_when_create_document() {
|
|||
)
|
||||
.await
|
||||
.unwrap();
|
||||
test_client
|
||||
.insert_view_to_general_space(
|
||||
&workspace_id,
|
||||
&object_id_1,
|
||||
"five dysfunctional",
|
||||
ViewLayout::Document,
|
||||
)
|
||||
.await;
|
||||
|
||||
test_client
|
||||
.wait_until_get_embedding(&workspace_id, &object_id_1)
|
||||
|
|
@ -46,6 +54,9 @@ async fn test_embedding_when_create_document() {
|
|||
)
|
||||
.await
|
||||
.unwrap();
|
||||
test_client
|
||||
.insert_view_to_general_space(&workspace_id, &object_id_2, "tennis", ViewLayout::Document)
|
||||
.await;
|
||||
|
||||
test_client
|
||||
.wait_until_get_embedding(&workspace_id, &object_id_2)
|
||||
|
|
|
|||
Loading…
Reference in New Issue