chore: add pnpm workspace configuration for apps and packages

2026-02-23 21:04:19 +08:00
parent ab02758382
commit 932f384f0d
31 changed files with 9081 additions and 203 deletions
--- a/doc/database-schema.md
+++ b/doc/database-schema.md
@@ -23,35 +23,41 @@ CREATE EXTENSION IF NOT EXISTS "pgvector";
 │ username        │       │ name            │       │ user_id (FK)    │
 │ password_hash   │       │ avatar_url      │       │ title           │
 │ keycloak_sub    │       │ personality     │       │ created_at      │
-│ role            │       │ backstory       │       │ updated_at      │
-│ created_at      │       │ attributes      │       └────────┬────────┘
-│ updated_at      │       │ created_at      │                │
-└─────────────────┘       │ updated_at      │                │
-                          └─────────────────┘                │
-┌─────────────────┐                                          │
-│import_documents │                                          │
-├─────────────────┤                                          │
-│ id (PK)         │                                          │
-│ user_id (FK)    │                                          │
-│ source_type     │       ┌─────────────────┐                │
-│ source_name     │       │    messages     │◄───────────────┘
-│ content         │       ├─────────────────┤
-│ metadata        │       │ id (PK)         │
-│ vector_id       │       │ conversation_id │
-│ created_at      │       │ role            │
-└─────────────────┘       │ content         │
-                          │ tokens_used     │
-                          │ model           │
-┌─────────────────┐       │ metadata        │
-│vector_memories  │       │ created_at      │
-├─────────────────┤       └─────────────────┘
-│ id (PK)         │
-│ conversation_id │       ┌─────────────────┐
-│ content         │       │  story_branches │  (Phase 2)
-│ embedding       │       ├─────────────────┤
-│ metadata        │       │ id (PK)         │
-│ created_at      │       │ conversation_id │
-└─────────────────┘       │ parent_id (FK)  │
+│ role            │       │ attributes      │       │ updated_at      │
+│ created_at      │       │ created_at      │       └────────┬────────┘
+│ updated_at      │       │ updated_at      │                │
+└─────────────────┘       └────────┬────────┘                │
+                                   │                         │
+                          ┌────────┴────────┐                │
+                          │character_knowledge│               │
+                          ├─────────────────┤                │
+                          │ id (PK)         │◄───────────────┤
+                          │ character_id    │                │
+                          │ name            │                │
+                          │ source_type     │                │
+                          │ raw_content     │                │
+                          │ status          │                │
+                          └────────┬────────┘                │
+                                   │                         │
+┌─────────────────┐       ┌────────┴────────┐       ┌────────┴────────┐
+│import_documents │       │  vector_memories│       │    messages     │
+├─────────────────┤       ├─────────────────┤       ├─────────────────┤
+│ id (PK)         │       │ id (PK)         │       │ id (PK)         │
+│ user_id (FK)    │       │ content         │       │ conversation_id │
+│ source_type     │       │ embedding       │       │ role            │
+│ source_name     │       │ memory_type     │       │ content         │
+│ content         │       │ conversation_id │       │ tokens_used     │
+│ status          │       │ character_id    │       │ model           │
+└─────────────────┘       │ knowledge_id    │       │ metadata        │
+                          │ created_at      │       │ created_at      │
+                          └─────────────────┘       └─────────────────┘
+                                   │
+                          ┌────────┴────────┐
+                          │  story_branches │  (Phase 2)
+                          ├─────────────────┤
+                          │ id (PK)         │
+                          │ conversation_id │
+                          │ parent_id (FK)  │
                          │ content         │
                          │ direction       │
                          │ metadata        │
@@ -89,7 +95,7 @@ CREATE INDEX idx_users_keycloak_sub ON users(keycloak_sub);

 ### 2. characters

-Character definitions with complex attribute system (JSONB for flexibility).
+Character definitions with complex attribute system. Character knowledge is stored separately in `character_knowledge` with embeddings.

 ```sql
 CREATE TABLE characters (
@@ -101,9 +107,6 @@ CREATE TABLE characters (
    -- Core personality prompt sent to LLM
    personality_prompt TEXT NOT NULL,
    
-    -- Backstory context for the character
-    backstory TEXT,
-    
    -- Complex attribute system (structured JSON)
    -- Example: {"traits": ["brave", "witty"], "age": 25, "species": "human"}
    attributes JSONB DEFAULT '{}',
@@ -181,19 +184,53 @@ CREATE INDEX idx_messages_created_at ON messages(created_at);
 CREATE INDEX idx_messages_conversation_created ON messages(conversation_id, created_at);
 ```

-### 5. vector_memories
+### 5. character_knowledge

-Vector embeddings for conversation memory using pgvector. Stores chunked content for semantic search.
+Multiple knowledge sources for characters. Each source is chunked and stored with embeddings in `vector_memories`.

 ```sql
-- Create vector extension first
-CREATE EXTENSION IF NOT EXISTS vector;
+CREATE TYPE import_source_type AS ENUM ('file', 'url', 'manual');
+CREATE TYPE import_status AS ENUM ('pending', 'processing', 'completed', 'failed');
+
+CREATE TABLE character_knowledge (
+    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
+    character_id UUID NOT NULL REFERENCES characters(id) ON DELETE CASCADE,
+    
+    -- Knowledge source info
+    name VARCHAR(255) NOT NULL,  -- Display name
+    source_type import_source_type NOT NULL,
+    source_name VARCHAR(255) NOT NULL,  -- Original filename or URL
+    mime_type VARCHAR(100),
+    file_size BIGINT,
+    
+    -- Raw content (before chunking)
+    raw_content TEXT,
+    
+    -- Processing status
+    status import_status DEFAULT 'pending',
+    processing_info JSONB,  -- chunks count, errors, etc.
+    
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+);
+
+CREATE INDEX idx_character_knowledge_character ON character_knowledge(character_id);
+CREATE INDEX idx_character_knowledge_status ON character_knowledge(status);
+```
+
+### 6. vector_memories
+
+Unified vector embeddings storage for:
+- **Character knowledge** - Background info, imported documents (linked to `character_knowledge`)
+- **Conversation history** - Chat context (linked to `conversations`)
+
+```sql
+CREATE TYPE memory_type AS ENUM ('conversation', 'character');

 CREATE TABLE vector_memories (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
-    conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
    
-    -- The text content
+    -- The text chunk
    content TEXT NOT NULL,
    
    -- Vector embedding (configurable dimension based on model)
@@ -201,14 +238,21 @@ CREATE TABLE vector_memories (
    -- Must match the EMBEDDING_DIMENSION env var
    embedding VECTOR({{EMBEDDING_DIMENSION}}),
    
-    -- Metadata for filtering
-    metadata JSONB DEFAULT '{
-        "chunk_index": 0,
-        "source": "conversation",
-        "timestamp": null
-    }',
+    -- Memory type determines the context
+    memory_type memory_type DEFAULT 'conversation',
    
-    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
+    -- Metadata (chunk_index, source_info, etc.)
+    metadata JSONB,
+    
+    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
+    
+    -- Polymorphic relations (at least one must be set)
+    -- For conversation context
+    conversation_id UUID REFERENCES conversations(id) ON DELETE CASCADE,
+    
+    -- For character knowledge
+    character_id UUID REFERENCES characters(id) ON DELETE CASCADE,
+    knowledge_id UUID REFERENCES character_knowledge(id) ON DELETE CASCADE
 );

 -- HNSW index for efficient similarity search
@@ -216,22 +260,22 @@ CREATE TABLE vector_memories (
 -- CREATE INDEX idx_vector_memories_embedding ON vector_memories 
 --     USING hnsw (embedding vector_cosine_ops);

-CREATE INDEX idx_vector_memories_conversation ON vector_memories(conversation_id);
+CREATE INDEX idx_vector_memories_conversation ON vector_memories(conversation_id) WHERE conversation_id IS NOT NULL;
+CREATE INDEX idx_vector_memories_character ON vector_memories(character_id) WHERE character_id IS NOT NULL;
+CREATE INDEX idx_vector_memories_knowledge ON vector_memories(knowledge_id) WHERE knowledge_id IS NOT NULL;
+CREATE INDEX idx_vector_memories_type ON vector_memories(memory_type);
 ```

-### 6. import_documents
+### 7. import_documents

-Raw imported documents from files or web scraping.
+General-purpose imported documents (not linked to characters). For character knowledge, use `character_knowledge`.

 ```sql
-CREATE TYPE import_source_type AS ENUM ('file', 'url');
-CREATE TYPE import_status AS ENUM ('pending', 'processing', 'completed', 'failed');
-
 CREATE TABLE import_documents (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
    
-    source_type import_source_type NOT NULL,
+    source_type import_source_type NOT NULL,  -- file, url, manual
    source_name VARCHAR(255) NOT NULL,  -- filename or URL
    
    -- Mime type for files
@@ -258,7 +302,7 @@ CREATE INDEX idx_import_documents_user_id ON import_documents(user_id);
 CREATE INDEX idx_import_documents_status ON import_documents(status);
 ```

-### 7. story_branches (Phase 2)
+### 8. story_branches (Phase 2)

 Tree structure for branching narratives.

@@ -291,7 +335,7 @@ CREATE INDEX idx_story_branches_conversation ON story_branches(conversation_id);
 CREATE INDEX idx_story_branches_parent ON story_branches(parent_id);
 ```

-### 8. conversation_participants (Phase 3 - Multi-Character)
+### 9. conversation_participants (Phase 3 - Multi-Character)

 Supports multiple characters in a single conversation.

@@ -313,12 +357,73 @@ CREATE TABLE conversation_participants (
 CREATE INDEX idx_participants_conversation ON conversation_participants(conversation_id);
 ```

+### Enums
+
+```sql
+-- User roles
+CREATE TYPE user_role AS ENUM ('USER', 'ADMIN');
+
+-- Message roles
+CREATE TYPE message_role AS ENUM ('user', 'assistant', 'system');
+
+-- Import/knowledge source types
+CREATE TYPE import_source_type AS ENUM ('file', 'url', 'manual');
+
+-- Processing status
+CREATE TYPE import_status AS ENUM ('pending', 'processing', 'completed', 'failed');
+
+-- Vector memory types
+CREATE TYPE memory_type AS ENUM ('conversation', 'character');
+```
+
 ## Prisma Schema (Reference)

+Prisma schema uses the [multi-file schema](https://www.prisma.io/docs/orm/prisma-schema/overview/location) feature. Models are organized in `prisma/models/` folder and imported into `schema.prisma`.
+
+### Schema Structure
+
+```
+prisma/
+├── schema.prisma          # Main schema file with imports
+├── seed.ts                # Database seeding
+└── models/
+    ├── user.prisma        # User model + UserRole enum
+    ├── character.prisma   # Character + CharacterKnowledge models
+    ├── conversation.prisma # Conversation + ConversationParticipant
+    ├── message.prisma     # Message model + MessageRole enum
+    ├── vectorMemory.prisma # VectorMemory + MemoryType enum
+    ├── importDocument.prisma # ImportDocument model
+    └── storyBranch.prisma # StoryBranch model
+```
+
+### Main Schema (schema.prisma)
+
+```prisma
+// schema.prisma
+
+generator client {
+  provider = "prisma-client-js"
+}
+
+datasource db {
+  provider = "postgresql"
+  url      = env("DATABASE_URL")
+}
+
+// Import all models from the models folder
+import { User } from "./models/user"
+import { Character, CharacterKnowledge } from "./models/character"
+import { Conversation, ConversationParticipant } from "./models/conversation"
+import { Message } from "./models/message"
+import { VectorMemory } from "./models/vectorMemory"
+import { ImportDocument } from "./models/importDocument"
+import { StoryBranch } from "./models/storyBranch"
+```
+
 ### Full Schema Definition

 ```prisma
-// schema.prisma
+// models/user.prisma

 generator client {
  provider = "prisma-client-js"
@@ -344,6 +449,7 @@ enum MessageRole {
 enum ImportSourceType {
  file
  url
+  manual
 }

 enum ImportStatus {
@@ -353,6 +459,11 @@ enum ImportStatus {
  failed
 }

+enum MemoryType {
+  conversation
+  character
+}
+
 // Models
 model User {
  id            String    @id @default(uuid())
@@ -378,7 +489,6 @@ model Character {
  name                String
  avatarUrl           String?
  personalityPrompt   String
-  backstory           String?
  attributes          Json     @default("{}")
  config              Json     @default("{}")
  isPublic            Boolean  @default(false)
@@ -386,30 +496,53 @@ model Character {
  updatedAt           DateTime @updatedAt

  userId              String
-  user                User           @relation(fields: [userId], references: [id], onDelete: Cascade)
+  user                User                @relation(fields: [userId], references: [id], onDelete: Cascade)
  conversations       Conversation[]
+  knowledgeSources    CharacterKnowledge[]
+  vectorMemories      VectorMemory[]

  @@index([userId])
  @@index([name])
 }

-model Conversation {
-  id            String    @id @default(uuid())
-  title         String?
-  messageCount  Int       @default(0)
-  totalTokens   Int       @default(0)
-  settings      Json      @default("{}")
-  createdAt     DateTime  @default(now())
-  updatedAt     DateTime  @updatedAt
+model CharacterKnowledge {
+  id              String           @id @default(uuid())
+  name            String
+  sourceType      ImportSourceType
+  sourceName      String
+  mimeType        String?
+  fileSize        BigInt?
+  rawContent      String?
+  status          ImportStatus     @default(pending)
+  processingInfo  Json?
+  createdAt       DateTime         @default(now())
+  updatedAt       DateTime         @updatedAt

-  userId        String
-  user          User                 @relation(fields: [userId], references: [id], onDelete: Cascade)
-  characterId   String
-  character     Character            @relation(fields: [characterId], references: [id], onDelete: Cascade)
-  messages      Message[]
+  characterId     String
+  character       Character        @relation(fields: [characterId], references: [id], onDelete: Cascade)
+  vectorMemories  VectorMemory[]
+
+  @@index([characterId])
+  @@index([status])
+}
+
+model Conversation {
+  id             String    @id @default(uuid())
+  title          String?
+  messageCount   Int       @default(0)
+  totalTokens    Int       @default(0)
+  settings       Json      @default("{}")
+  createdAt      DateTime  @default(now())
+  updatedAt      DateTime  @updatedAt
+
+  userId         String
+  user           User                 @relation(fields: [userId], references: [id], onDelete: Cascade)
+  characterId    String
+  character      Character            @relation(fields: [characterId], references: [id], onDelete: Cascade)
+  messages       Message[]
  vectorMemories VectorMemory[]
-  storyBranches StoryBranch[]
-  participants  ConversationParticipant[]
+  storyBranches  StoryBranch[]
+  participants   ConversationParticipant[]

  @@index([userId])
  @@index([characterId])
@@ -436,14 +569,24 @@ model Message {
 model VectorMemory {
  id              String    @id @default(uuid())
  content         String
-  embedding       Unsupported("vector")?  // pgvector extension
+  embedding       Unsupported("vector")?
+  memoryType      MemoryType @default(conversation)
  metadata        Json?
  createdAt       DateTime  @default(now())

-  conversationId  String
-  conversation    Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade)
+  conversationId  String?
+  conversation    Conversation? @relation(fields: [conversationId], references: [id], onDelete: Cascade)
+  
+  characterId     String?
+  character       Character?    @relation(fields: [characterId], references: [id], onDelete: Cascade)
+  
+  knowledgeId     String?
+  knowledge       CharacterKnowledge? @relation(fields: [knowledgeId], references: [id], onDelete: Cascade)

  @@index([conversationId])
+  @@index([characterId])
+  @@index([knowledgeId])
+  @@index([memoryType])
 }

 model ImportDocument {
@@ -546,11 +689,16 @@ export class CharacterRepository {
 ```typescript
 // Similarity search using pgvector with Prisma
 async similaritySearch(
-  conversationId: string,
+  targetId: string,
  queryEmbedding: number[],
+  memoryType: MemoryType,
  k: number = 5
 ) {
-  // Using raw query for pgvector-specific operations
+  // Build the where clause based on memory type
+  const whereClause = memoryType === 'conversation' 
+    ? { conversationId: targetId, memoryType }
+    : { characterId: targetId, memoryType };
+
  const results = await this.prisma.$queryRaw`
    SELECT 
      id,
@@ -558,7 +706,7 @@ async similaritySearch(
      metadata,
      embedding <=> ${queryEmbedding}::vector as distance
    FROM "VectorMemory"
-    WHERE "conversationId" = ${conversationId}
+    WHERE ${whereClause}
    ORDER BY embedding <=> ${queryEmbedding}::vector
    LIMIT ${k}
  `;
@@ -566,20 +714,23 @@ async similaritySearch(
  return results;
 }

-// Alternative: using cosine similarity
-async similaritySearchCosine(
-  conversationId: string,
+// Search character knowledge
+async searchCharacterKnowledge(
+  characterId: string,
  queryEmbedding: number[],
  k: number = 5
 ) {
  const results = await this.prisma.$queryRaw`
    SELECT 
-      id,
-      content,
-      metadata,
-      1 - (embedding <=> ${queryEmbedding}::vector) as similarity
-    FROM "VectorMemory"
-    WHERE "conversationId" = ${conversationId}
+      vm.id,
+      vm.content,
+      vm.metadata,
+      ck.name as source_name,
+      1 - (vm.embedding <=> ${queryEmbedding}::vector) as similarity
+    FROM "VectorMemory" vm
+    JOIN "CharacterKnowledge" ck ON vm."knowledgeId" = ck.id
+    WHERE vm."characterId" = ${characterId}
+      AND vm."memoryType" = 'character'
    ORDER BY similarity DESC
    LIMIT ${k}
  `;