84 lines
2.8 KiB
Rust
84 lines
2.8 KiB
Rust
use std::ops::DerefMut;
|
|
|
|
use chrono::{DateTime, Utc};
|
|
use pgvector::Vector;
|
|
use sqlx::Transaction;
|
|
use uuid::Uuid;
|
|
|
|
pub async fn search_documents(
|
|
tx: &mut Transaction<'_, sqlx::Postgres>,
|
|
params: SearchDocumentParams,
|
|
tokens_used: u32,
|
|
) -> Result<Vec<SearchDocumentItem>, sqlx::Error> {
|
|
let query = sqlx::query_as::<_, SearchDocumentItem>(
|
|
r#"
|
|
WITH workspace AS (
|
|
INSERT INTO af_workspace_ai_usage(created_at, workspace_id, search_requests, search_tokens_consumed, index_tokens_consumed)
|
|
VALUES (now()::date, $2, 1, $6, 0)
|
|
ON CONFLICT (created_at, workspace_id) DO UPDATE
|
|
SET search_requests = af_workspace_ai_usage.search_requests + 1,
|
|
search_tokens_consumed = af_workspace_ai_usage.search_tokens_consumed + $6
|
|
RETURNING workspace_id
|
|
)
|
|
SELECT
|
|
em.oid AS object_id,
|
|
collab.workspace_id,
|
|
em.partition_key AS collab_type,
|
|
em.content_type,
|
|
LEFT(em.content, $4) AS content_preview,
|
|
u.name AS created_by,
|
|
collab.created_at AS created_at,
|
|
em.embedding <=> $3 AS score
|
|
FROM af_collab_embeddings em
|
|
JOIN af_collab collab ON em.oid = collab.oid AND em.partition_key = collab.partition_key
|
|
JOIN af_collab_member member ON collab.oid = member.oid
|
|
JOIN af_user u ON collab.owner_uid = u.uid
|
|
WHERE member.uid = $1 AND collab.workspace_id = $2 AND collab.deleted_at IS NULL
|
|
ORDER BY em.embedding <=> $3
|
|
LIMIT $5
|
|
"#,
|
|
)
|
|
.bind(params.user_id)
|
|
.bind(params.workspace_id)
|
|
.bind(Vector::from(params.embedding))
|
|
.bind(params.preview)
|
|
.bind(params.limit)
|
|
.bind(tokens_used as i64);
|
|
let rows = query.fetch_all(tx.deref_mut()).await?;
|
|
Ok(rows)
|
|
}
|
|
|
|
#[derive(Debug, Clone)]
|
|
pub struct SearchDocumentParams {
|
|
/// ID of the user who is searching.
|
|
pub user_id: i64,
|
|
/// Workspace ID to search for documents in.
|
|
pub workspace_id: Uuid,
|
|
/// How many results should be returned.
|
|
pub limit: i32,
|
|
/// How many characters of the content (starting from the beginning) should be returned.
|
|
pub preview: i32,
|
|
/// Embedding of the query - generated by OpenAI embedder.
|
|
pub embedding: Vec<f32>,
|
|
}
|
|
|
|
#[derive(Debug, Clone, sqlx::FromRow)]
|
|
pub struct SearchDocumentItem {
|
|
/// Document identifier.
|
|
pub object_id: String,
|
|
/// Workspace identifier, given document belongs to.
|
|
pub workspace_id: Uuid,
|
|
/// Partition key, which maps directly onto [collab_entity::CollabType].
|
|
pub collab_type: i32,
|
|
/// Type of the content to be presented. Maps directly onto [database_entity::dto::EmbeddingContentType].
|
|
pub content_type: i32,
|
|
/// First N character of the indexed content.
|
|
pub content_preview: Option<String>,
|
|
/// Name of the user who's an owner of the document.
|
|
pub created_by: String,
|
|
/// When the document was created.
|
|
pub created_at: DateTime<Utc>,
|
|
/// Similarity score to an original query. Lower is better.
|
|
pub score: f64,
|
|
}
|