Avoid leaking private document in search endpoint (#1113)

* fix: auto patch workspace folder without space

* fix: avoid leaking private document contents in search endpoint

---------

Co-authored-by: weidong fu <nathan@appflowy.io>
This commit is contained in:
Khor Shu Heng 2025-01-05 15:28:52 +08:00 committed by GitHub
parent ab0fa6e7fc
commit 327e184e28
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 72 additions and 16 deletions

View File

@ -32,6 +32,7 @@ async fn document_search(
let metrics = &*state.metrics.request_metrics;
let resp = search_document(
&state.pg_pool,
&state.collab_access_control_storage,
&state.indexer_scheduler,
uid,
workspace_id,

View File

@ -9,15 +9,13 @@ use shared_entity::dto::workspace_dto::{
};
use uuid::Uuid;
/// Return all folders belonging to a workspace, excluding private sections which the user does not have access to.
pub fn collab_folder_to_folder_view(
workspace_id: Uuid,
root_view_id: &str,
folder: &Folder,
max_depth: u32,
pubished_view_ids: &HashSet<String>,
) -> Result<FolderView, AppError> {
let mut unviewable = HashSet::new();
pub struct PrivateAndNonviewableViews {
pub my_private_view_ids: HashSet<String>,
pub nonviewable_view_ids: HashSet<String>,
}
pub fn private_and_nonviewable_view_ids(folder: &Folder) -> PrivateAndNonviewableViews {
let mut nonviewable_view_ids = HashSet::new();
let mut my_private_view_ids = HashSet::new();
for private_section in folder.get_my_private_sections() {
my_private_view_ids.insert(private_section.id);
@ -26,21 +24,36 @@ pub fn collab_folder_to_folder_view(
if let Some(private_view) = folder.get_view(&private_section.id) {
if check_if_view_is_space(&private_view) && !my_private_view_ids.contains(&private_section.id)
{
unviewable.insert(private_section.id);
nonviewable_view_ids.insert(private_section.id);
}
}
}
for trash_view in folder.get_all_trash_sections() {
unviewable.insert(trash_view.id);
nonviewable_view_ids.insert(trash_view.id);
}
PrivateAndNonviewableViews {
my_private_view_ids,
nonviewable_view_ids,
}
}
/// Return all folders belonging to a workspace, excluding private sections which the user does not have access to.
pub fn collab_folder_to_folder_view(
workspace_id: Uuid,
root_view_id: &str,
folder: &Folder,
max_depth: u32,
pubished_view_ids: &HashSet<String>,
) -> Result<FolderView, AppError> {
let private_and_nonviewable_views = private_and_nonviewable_view_ids(folder);
to_folder_view(
workspace_id,
"",
root_view_id,
folder,
&unviewable,
&my_private_view_ids,
&private_and_nonviewable_views.nonviewable_view_ids,
&private_and_nonviewable_views.my_private_view_ids,
pubished_view_ids,
false,
0,
@ -227,6 +240,27 @@ pub fn section_items_to_trash_folder_view(
.collect()
}
pub fn check_if_view_ancestors_fulfil_condition(
view_id: &str,
collab_folder: &Folder,
condition: impl Fn(&collab_folder::View) -> bool,
) -> bool {
let mut current_view_id = view_id.to_string();
loop {
let view = match collab_folder.get_view(&current_view_id) {
Some(view) => view,
None => return false,
};
if condition(&view) {
return true;
}
current_view_id = view.parent_view_id.clone();
if current_view_id.is_empty() {
return false;
}
}
}
pub fn check_if_view_is_space(view: &collab_folder::View) -> bool {
let extra = match view.extra.as_ref() {
Some(extra) => extra,

View File

@ -26,10 +26,9 @@ use collab_document::document::Document;
use collab_entity::CollabType;
use collab_entity::EncodedCollab;
use collab_folder::hierarchy_builder::NestedChildViewBuilder;
use collab_folder::CollabOrigin;
use collab_folder::Folder;
use collab_folder::SectionItem;
use collab_folder::SpaceInfo;
use collab_folder::{CollabOrigin, SpaceInfo};
use collab_rt_entity::user::RealtimeUser;
use database::collab::select_last_updated_database_row_ids;
use database::collab::select_workspace_database_oid;

View File

@ -1,8 +1,14 @@
use crate::api::metrics::RequestMetrics;
use crate::biz::collab::folder_view::{
check_if_view_ancestors_fulfil_condition, private_and_nonviewable_view_ids,
};
use crate::biz::collab::utils::get_latest_collab_folder;
use app_error::ErrorCode;
use appflowy_ai_client::dto::{
EmbeddingEncodingFormat, EmbeddingInput, EmbeddingModel, EmbeddingOutput, EmbeddingRequest,
};
use appflowy_collaborate::collab::storage::CollabAccessControlStorage;
use database::collab::GetCollabOrigin;
use std::sync::Arc;
use database::index::{search_documents, SearchDocumentParams};
@ -17,6 +23,7 @@ use uuid::Uuid;
pub async fn search_document(
pg_pool: &PgPool,
collab_storage: &CollabAccessControlStorage,
indexer_scheduler: &Arc<IndexerScheduler>,
uid: i64,
workspace_id: Uuid,
@ -75,8 +82,23 @@ pub async fn search_document(
results.len(),
request.query
);
let folder = get_latest_collab_folder(
collab_storage,
GetCollabOrigin::User { uid },
&workspace_id.to_string(),
)
.await?;
let private_and_nonviewable_views = private_and_nonviewable_view_ids(&folder);
let non_searchable_view_ids = private_and_nonviewable_views.nonviewable_view_ids;
let filtered_results = results.into_iter().filter(|item| {
!check_if_view_ancestors_fulfil_condition(&item.object_id, &folder, |view| {
non_searchable_view_ids.contains(&view.id)
})
});
Ok(
results
filtered_results
.into_iter()
.map(|item| SearchDocumentResponseItem {
object_id: item.object_id,