# DreamChat Database Schema ## Overview PostgreSQL with pgvector extension for vector storage. All data is stored locally (offline-first). ## Extensions Required ```sql -- Enable required extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "pgvector"; ``` ## Entity Relationship Diagram ``` ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ │ users │ │ characters │ │ conversations │ ├─────────────────┤ ├─────────────────┤ ├─────────────────┤ │ id (PK) │◄──────│ user_id (FK) │ │ id (PK) │ │ email │ │ id (PK) │◄──────│ character_id(FK)│ │ username │ │ name │ │ user_id (FK) │ │ password_hash │ │ avatar_url │ │ title │ │ keycloak_sub │ │ personality │ │ created_at │ │ role │ │ backstory │ │ updated_at │ │ created_at │ │ attributes │ └────────┬────────┘ │ updated_at │ │ created_at │ │ └─────────────────┘ │ updated_at │ │ └─────────────────┘ │ ┌─────────────────┐ │ │import_documents │ │ ├─────────────────┤ │ │ id (PK) │ │ │ user_id (FK) │ │ │ source_type │ ┌─────────────────┐ │ │ source_name │ │ messages │◄───────────────┘ │ content │ ├─────────────────┤ │ metadata │ │ id (PK) │ │ vector_id │ │ conversation_id │ │ created_at │ │ role │ └─────────────────┘ │ content │ │ tokens_used │ │ model │ ┌─────────────────┐ │ metadata │ │vector_memories │ │ created_at │ ├─────────────────┤ └─────────────────┘ │ id (PK) │ │ conversation_id │ ┌─────────────────┐ │ content │ │ story_branches │ (Phase 2) │ embedding │ ├─────────────────┤ │ metadata │ │ id (PK) │ │ created_at │ │ conversation_id │ └─────────────────┘ │ parent_id (FK) │ │ content │ │ direction │ │ metadata │ │ created_at │ └─────────────────┘ ``` ## Table Definitions ### 1. users Stores user account information. Supports both Keycloak (OIDC) and local password authentication. ```sql CREATE TABLE users ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), email VARCHAR(255) UNIQUE NOT NULL, username VARCHAR(100) UNIQUE NOT NULL, password_hash VARCHAR(255), -- NULL if using Keycloak keycloak_sub VARCHAR(255) UNIQUE, -- NULL if using password auth role VARCHAR(20) DEFAULT 'USER' CHECK (role IN ('USER', 'ADMIN')), is_active BOOLEAN DEFAULT true, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), -- At least one auth method must be set CONSTRAINT auth_method_check CHECK ( (password_hash IS NOT NULL) OR (keycloak_sub IS NOT NULL) ) ); CREATE INDEX idx_users_email ON users(email); CREATE INDEX idx_users_keycloak_sub ON users(keycloak_sub); ``` ### 2. characters Character definitions with complex attribute system (JSONB for flexibility). ```sql CREATE TABLE characters ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, name VARCHAR(255) NOT NULL, avatar_url TEXT, -- Core personality prompt sent to LLM personality_prompt TEXT NOT NULL, -- Backstory context for the character backstory TEXT, -- Complex attribute system (structured JSON) -- Example: {"traits": ["brave", "witty"], "age": 25, "species": "human"} attributes JSONB DEFAULT '{}', -- Character configuration config JSONB DEFAULT '{ "model": "openai/gpt-4o", "temperature": 0.7, "max_tokens": 2048, "memory_enabled": true }', is_public BOOLEAN DEFAULT false, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); CREATE INDEX idx_characters_user_id ON characters(user_id); CREATE INDEX idx_characters_name ON characters(name); CREATE INDEX idx_characters_attributes ON characters USING GIN(attributes); ``` ### 3. conversations Chat sessions between user and character. ```sql CREATE TABLE conversations ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, character_id UUID NOT NULL REFERENCES characters(id) ON DELETE CASCADE, title VARCHAR(255), -- Auto-generated or user-defined -- Context window management message_count INTEGER DEFAULT 0, total_tokens INTEGER DEFAULT 0, -- Conversation settings settings JSONB DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); CREATE INDEX idx_conversations_user_id ON conversations(user_id); CREATE INDEX idx_conversations_character_id ON conversations(character_id); CREATE INDEX idx_conversations_created_at ON conversations(created_at); ``` ### 4. messages Individual chat messages. ```sql CREATE TYPE message_role AS ENUM ('user', 'assistant', 'system'); CREATE TABLE messages ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE, role message_role NOT NULL, content TEXT NOT NULL, -- LLM metadata tokens_used INTEGER, model VARCHAR(100), -- Additional metadata (temperature, latency, etc.) metadata JSONB DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); CREATE INDEX idx_messages_conversation_id ON messages(conversation_id); CREATE INDEX idx_messages_created_at ON messages(created_at); CREATE INDEX idx_messages_conversation_created ON messages(conversation_id, created_at); ``` ### 5. vector_memories Vector embeddings for conversation memory using pgvector. Stores chunked content for semantic search. ```sql -- Create vector extension first CREATE EXTENSION IF NOT EXISTS vector; CREATE TABLE vector_memories ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE, -- The text content content TEXT NOT NULL, -- Vector embedding (configurable dimension based on model) -- Common sizes: 384 (all-MiniLM-L6-v2), 768 (all-mpnet-base-v2), 1024 (BGE) -- Must match the EMBEDDING_DIMENSION env var embedding VECTOR({{EMBEDDING_DIMENSION}}), -- Metadata for filtering metadata JSONB DEFAULT '{ "chunk_index": 0, "source": "conversation", "timestamp": null }', created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); -- HNSW index for efficient similarity search -- Note: Index is created after table creation based on actual dimension -- CREATE INDEX idx_vector_memories_embedding ON vector_memories -- USING hnsw (embedding vector_cosine_ops); CREATE INDEX idx_vector_memories_conversation ON vector_memories(conversation_id); ``` ### 6. import_documents Raw imported documents from files or web scraping. ```sql CREATE TYPE import_source_type AS ENUM ('file', 'url'); CREATE TYPE import_status AS ENUM ('pending', 'processing', 'completed', 'failed'); CREATE TABLE import_documents ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, source_type import_source_type NOT NULL, source_name VARCHAR(255) NOT NULL, -- filename or URL -- Mime type for files mime_type VARCHAR(100), -- File size in bytes file_size BIGINT, -- Raw content (preprocessed) content TEXT, -- Processing status status import_status DEFAULT 'pending', error_message TEXT, -- Metadata (source info, extraction method, etc.) metadata JSONB DEFAULT '{}', created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); CREATE INDEX idx_import_documents_user_id ON import_documents(user_id); CREATE INDEX idx_import_documents_status ON import_documents(status); ``` ### 7. story_branches (Phase 2) Tree structure for branching narratives. ```sql CREATE TABLE story_branches ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE, -- Self-referential for tree structure parent_id UUID REFERENCES story_branches(id) ON DELETE CASCADE, -- Branch content title VARCHAR(255), content TEXT NOT NULL, -- The generated story content -- User direction that led to this branch user_direction TEXT, -- Branch metadata generation_params JSONB DEFAULT '{}', -- Tree position depth INTEGER DEFAULT 0, branch_order INTEGER DEFAULT 0, created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW() ); CREATE INDEX idx_story_branches_conversation ON story_branches(conversation_id); CREATE INDEX idx_story_branches_parent ON story_branches(parent_id); ``` ### 8. conversation_participants (Phase 3 - Multi-Character) Supports multiple characters in a single conversation. ```sql CREATE TABLE conversation_participants ( id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE, character_id UUID NOT NULL REFERENCES characters(id) ON DELETE CASCADE, -- Participant settings is_active BOOLEAN DEFAULT true, auto_respond BOOLEAN DEFAULT true, -- Auto-generate responses created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(), UNIQUE(conversation_id, character_id) ); CREATE INDEX idx_participants_conversation ON conversation_participants(conversation_id); ``` ## Prisma Schema (Reference) ### Full Schema Definition ```prisma // schema.prisma generator client { provider = "prisma-client-js" } datasource db { provider = "postgresql" url = env("DATABASE_URL") } // Enums enum UserRole { USER ADMIN } enum MessageRole { user assistant system } enum ImportSourceType { file url } enum ImportStatus { pending processing completed failed } // Models model User { id String @id @default(uuid()) email String @unique username String @unique passwordHash String? keycloakSub String? @unique role UserRole @default(USER) isActive Boolean @default(true) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt characters Character[] conversations Conversation[] importDocs ImportDocument[] @@index([email]) @@index([keycloakSub]) } model Character { id String @id @default(uuid()) name String avatarUrl String? personalityPrompt String backstory String? attributes Json @default("{}") config Json @default("{}") isPublic Boolean @default(false) createdAt DateTime @default(now()) updatedAt DateTime @updatedAt userId String user User @relation(fields: [userId], references: [id], onDelete: Cascade) conversations Conversation[] @@index([userId]) @@index([name]) } model Conversation { id String @id @default(uuid()) title String? messageCount Int @default(0) totalTokens Int @default(0) settings Json @default("{}") createdAt DateTime @default(now()) updatedAt DateTime @updatedAt userId String user User @relation(fields: [userId], references: [id], onDelete: Cascade) characterId String character Character @relation(fields: [characterId], references: [id], onDelete: Cascade) messages Message[] vectorMemories VectorMemory[] storyBranches StoryBranch[] participants ConversationParticipant[] @@index([userId]) @@index([characterId]) @@index([createdAt]) } model Message { id String @id @default(uuid()) role MessageRole content String tokensUsed Int? model String? metadata Json? createdAt DateTime @default(now()) conversationId String conversation Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade) @@index([conversationId]) @@index([createdAt]) @@index([conversationId, createdAt]) } model VectorMemory { id String @id @default(uuid()) content String embedding Unsupported("vector")? // pgvector extension metadata Json? createdAt DateTime @default(now()) conversationId String conversation Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade) @@index([conversationId]) } model ImportDocument { id String @id @default(uuid()) sourceType ImportSourceType sourceName String mimeType String? fileSize BigInt? content String? status ImportStatus @default(pending) errorMessage String? metadata Json? createdAt DateTime @default(now()) updatedAt DateTime @updatedAt userId String user User @relation(fields: [userId], references: [id], onDelete: Cascade) @@index([userId]) @@index([status]) } model StoryBranch { id String @id @default(uuid()) title String? content String userDirection String generationParams Json? depth Int @default(0) branchOrder Int @default(0) createdAt DateTime @default(now()) conversationId String conversation Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade) parentId String? parent StoryBranch? @relation("BranchTree", fields: [parentId], references: [id], onDelete: Cascade) children StoryBranch[] @relation("BranchTree") @@index([conversationId]) @@index([parentId]) } model ConversationParticipant { id String @id @default(uuid()) isActive Boolean @default(true) autoRespond Boolean @default(true) createdAt DateTime @default(now()) conversationId String conversation Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade) characterId String @@unique([conversationId, characterId]) @@index([conversationId]) } ``` ### Prisma Client Usage Examples ```typescript // src/shared/prisma/prisma.service.ts import { Injectable, OnModuleInit, OnModuleDestroy } from '@nestjs/common'; import { PrismaClient } from '@prisma/client'; @Injectable() export class PrismaService extends PrismaClient implements OnModuleInit, OnModuleDestroy { async onModuleInit() { await this.$connect(); } async onModuleDestroy() { await this.$disconnect(); } } ``` ```typescript // Repository pattern with Prisma @Injectable() export class CharacterRepository { constructor(private prisma: PrismaService) {} async findByUser(userId: string) { return this.prisma.character.findMany({ where: { userId }, orderBy: { updatedAt: 'desc' }, }); } async create(data: CreateCharacterDto, userId: string) { return this.prisma.character.create({ data: { ...data, userId }, }); } } ``` ### Vector Memory Query with Prisma ```typescript // Similarity search using pgvector with Prisma async similaritySearch( conversationId: string, queryEmbedding: number[], k: number = 5 ) { // Using raw query for pgvector-specific operations const results = await this.prisma.$queryRaw` SELECT id, content, metadata, embedding <=> ${queryEmbedding}::vector as distance FROM "VectorMemory" WHERE "conversationId" = ${conversationId} ORDER BY embedding <=> ${queryEmbedding}::vector LIMIT ${k} `; return results; } // Alternative: using cosine similarity async similaritySearchCosine( conversationId: string, queryEmbedding: number[], k: number = 5 ) { const results = await this.prisma.$queryRaw` SELECT id, content, metadata, 1 - (embedding <=> ${queryEmbedding}::vector) as similarity FROM "VectorMemory" WHERE "conversationId" = ${conversationId} ORDER BY similarity DESC LIMIT ${k} `; return results; } ``` ### Embedding Configuration ```typescript // Configuration for embedding providers interface EmbeddingConfig { provider: 'local' | 'huggingface-api'; model: string; dimension: number; // For local provider localModelPath?: string; // For HuggingFace API apiKey?: string; apiEndpoint?: string; } // Example configurations: // Local: { provider: 'local', model: 'Xenova/all-MiniLM-L6-v2', dimension: 384 } // HF API: { provider: 'huggingface-api', model: 'sentence-transformers/all-mpnet-base-v2', dimension: 768 } ``` ## Prisma Migration Strategy ### Initial Migration ```bash # 1. Initialize Prisma npx prisma init # 2. Define schema in prisma/schema.prisma # 3. Create first migration npx prisma migrate dev --name init # 4. Generate Prisma Client npx prisma generate ``` ### Migration Workflow ```bash # After schema changes npx prisma migrate dev --name descriptive_name # Production deployment npx prisma migrate deploy # Generate client (in CI/CD) npx prisma generate ``` ### Migration File Example ```sql -- migrations/20240223120000_init/migration.sql -- Enable extensions CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; CREATE EXTENSION IF NOT EXISTS "vector"; -- CreateEnum CREATE TYPE "UserRole" AS ENUM ('USER', 'ADMIN'); -- CreateEnum CREATE TYPE "MessageRole" AS ENUM ('user', 'assistant', 'system'); -- CreateTable CREATE TABLE "User" ( "id" UUID NOT NULL DEFAULT uuid_generate_v4(), "email" TEXT NOT NULL, "username" TEXT NOT NULL, -- ... etc ); -- CreateIndex CREATE UNIQUE INDEX "User_email_key" ON "User"("email"); -- Vector index (HNSW) - created manually after migration CREATE INDEX idx_vector_memories_embedding ON "VectorMemory" USING hnsw (embedding vector_cosine_ops); ``` ### Seeding ```typescript // prisma/seed.ts import { PrismaClient } from '@prisma/client'; const prisma = new PrismaClient(); async function main() { // Seed default admin or test data await prisma.user.create({ data: { email: 'admin@dreamchat.local', username: 'admin', role: 'ADMIN', }, }); } main() .catch(console.error) .finally(() => prisma.$disconnect()); ``` ```bash # Run seed npx prisma db seed ``` ## Backup Strategy ```bash # pg_dump with custom format docker exec dreamchat-postgres pg_dump -U postgres -Fc dreamchat > backup.dump # Restore pg_restore -U postgres -d dreamchat backup.dump ```