AppFlowy-Cloud/libs/database/src/index/search_ops.rs

84 lines
2.8 KiB
Rust

use std::ops::DerefMut;
use chrono::{DateTime, Utc};
use pgvector::Vector;
use sqlx::Transaction;
use uuid::Uuid;
pub async fn search_documents(
tx: &mut Transaction<'_, sqlx::Postgres>,
params: SearchDocumentParams,
tokens_used: u32,
) -> Result<Vec<SearchDocumentItem>, sqlx::Error> {
let query = sqlx::query_as::<_, SearchDocumentItem>(
r#"
WITH workspace AS (
INSERT INTO af_workspace_ai_usage(created_at, workspace_id, search_requests, search_tokens_consumed, index_tokens_consumed)
VALUES (now()::date, $2, 1, $6, 0)
ON CONFLICT (created_at, workspace_id) DO UPDATE
SET search_requests = af_workspace_ai_usage.search_requests + 1,
search_tokens_consumed = af_workspace_ai_usage.search_tokens_consumed + $6
RETURNING workspace_id
)
SELECT
em.oid AS object_id,
collab.workspace_id,
em.partition_key AS collab_type,
em.content_type,
LEFT(em.content, $4) AS content_preview,
u.name AS created_by,
collab.created_at AS created_at,
em.embedding <=> $3 AS score
FROM af_collab_embeddings em
JOIN af_collab collab ON em.oid = collab.oid AND em.partition_key = collab.partition_key
JOIN af_collab_member member ON collab.oid = member.oid
JOIN af_user u ON collab.owner_uid = u.uid
WHERE member.uid = $1 AND collab.workspace_id = $2 AND collab.deleted_at IS NULL
ORDER BY em.embedding <=> $3
LIMIT $5
"#,
)
.bind(params.user_id)
.bind(params.workspace_id)
.bind(Vector::from(params.embedding))
.bind(params.preview)
.bind(params.limit)
.bind(tokens_used as i64);
let rows = query.fetch_all(tx.deref_mut()).await?;
Ok(rows)
}
#[derive(Debug, Clone)]
pub struct SearchDocumentParams {
/// ID of the user who is searching.
pub user_id: i64,
/// Workspace ID to search for documents in.
pub workspace_id: Uuid,
/// How many results should be returned.
pub limit: i32,
/// How many characters of the content (starting from the beginning) should be returned.
pub preview: i32,
/// Embedding of the query - generated by OpenAI embedder.
pub embedding: Vec<f32>,
}
#[derive(Debug, Clone, sqlx::FromRow)]
pub struct SearchDocumentItem {
/// Document identifier.
pub object_id: String,
/// Workspace identifier, given document belongs to.
pub workspace_id: Uuid,
/// Partition key, which maps directly onto [collab_entity::CollabType].
pub collab_type: i32,
/// Type of the content to be presented. Maps directly onto [database_entity::dto::EmbeddingContentType].
pub content_type: i32,
/// First N character of the indexed content.
pub content_preview: Option<String>,
/// Name of the user who's an owner of the document.
pub created_by: String,
/// When the document was created.
pub created_at: DateTime<Utc>,
/// Similarity score to an original query. Lower is better.
pub score: f64,
}