Files
DreamChat/doc/database-schema.md

26 KiB

DreamChat Database Schema

Overview

PostgreSQL with pgvector extension for vector storage. All data is stored locally (offline-first).

Extensions Required

-- Enable required extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgvector";

Entity Relationship Diagram

┌─────────────────┐       ┌─────────────────┐       ┌─────────────────┐
│      users      │       │   characters    │       │  conversations  │
├─────────────────┤       ├─────────────────┤       ├─────────────────┤
│ id (PK)         │◄──────│ user_id (FK)    │       │ id (PK)         │
│ email           │       │ id (PK)         │◄──────│ character_id(FK)│
│ username        │       │ name            │       │ user_id (FK)    │
│ password_hash   │       │ avatar_url      │       │ title           │
│ keycloak_sub    │       │ personality     │       │ created_at      │
│ role            │       │ attributes      │       │ updated_at      │
│ created_at      │       │ created_at      │       └────────┬────────┘
│ updated_at      │       │ updated_at      │                │
└─────────────────┘       └────────┬────────┘                │
                                   │                         │
                          ┌────────┴────────┐                │
                          │character_knowledge│               │
                          ├─────────────────┤                │
                          │ id (PK)         │◄───────────────┤
                          │ character_id    │                │
                          │ name            │                │
                          │ source_type     │                │
                          │ raw_content     │                │
                          │ status          │                │
                          └────────┬────────┘                │
                                   │                         │
┌─────────────────┐       ┌────────┴────────┐       ┌────────┴────────┐
│import_documents │       │  vector_memories│       │    messages     │
├─────────────────┤       ├─────────────────┤       ├─────────────────┤
│ id (PK)         │       │ id (PK)         │       │ id (PK)         │
│ user_id (FK)    │       │ content         │       │ conversation_id │
│ source_type     │       │ embedding       │       │ role            │
│ source_name     │       │ memory_type     │       │ content         │
│ content         │       │ conversation_id │       │ tokens_used     │
│ status          │       │ character_id    │       │ model           │
└─────────────────┘       │ knowledge_id    │       │ metadata        │
                          │ created_at      │       │ created_at      │
                          └─────────────────┘       └─────────────────┘
                                   │
                          ┌────────┴────────┐
                          │  story_branches │  (Phase 2)
                          ├─────────────────┤
                          │ id (PK)         │
                          │ conversation_id │
                          │ parent_id (FK)  │
                          │ content         │
                          │ direction       │
                          │ metadata        │
                          │ created_at      │
                          └─────────────────┘

Table Definitions

1. users

Stores user account information. Supports both Keycloak (OIDC) and local password authentication.

CREATE TABLE users (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    email VARCHAR(255) UNIQUE NOT NULL,
    username VARCHAR(100) UNIQUE NOT NULL,
    password_hash VARCHAR(255),  -- NULL if using Keycloak
    keycloak_sub VARCHAR(255) UNIQUE,  -- NULL if using password auth
    role VARCHAR(20) DEFAULT 'USER' CHECK (role IN ('USER', 'ADMIN')),
    is_active BOOLEAN DEFAULT true,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    
    -- At least one auth method must be set
    CONSTRAINT auth_method_check CHECK (
        (password_hash IS NOT NULL) OR (keycloak_sub IS NOT NULL)
    )
);

CREATE INDEX idx_users_email ON users(email);
CREATE INDEX idx_users_keycloak_sub ON users(keycloak_sub);

2. characters

Character definitions with complex attribute system. Character knowledge is stored separately in character_knowledge with embeddings.

CREATE TABLE characters (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
    name VARCHAR(255) NOT NULL,
    avatar_url TEXT,
    
    -- Core personality prompt sent to LLM
    personality_prompt TEXT NOT NULL,
    
    -- Complex attribute system (structured JSON)
    -- Example: {"traits": ["brave", "witty"], "age": 25, "species": "human"}
    attributes JSONB DEFAULT '{}',
    
    -- Character configuration
    config JSONB DEFAULT '{
        "model": "openai/gpt-4o",
        "temperature": 0.7,
        "max_tokens": 2048,
        "memory_enabled": true
    }',
    
    is_public BOOLEAN DEFAULT false,
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX idx_characters_user_id ON characters(user_id);
CREATE INDEX idx_characters_name ON characters(name);
CREATE INDEX idx_characters_attributes ON characters USING GIN(attributes);

3. conversations

Chat sessions between user and character.

CREATE TABLE conversations (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
    character_id UUID NOT NULL REFERENCES characters(id) ON DELETE CASCADE,
    title VARCHAR(255),  -- Auto-generated or user-defined
    
    -- Context window management
    message_count INTEGER DEFAULT 0,
    total_tokens INTEGER DEFAULT 0,
    
    -- Conversation settings
    settings JSONB DEFAULT '{}',
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX idx_conversations_user_id ON conversations(user_id);
CREATE INDEX idx_conversations_character_id ON conversations(character_id);
CREATE INDEX idx_conversations_created_at ON conversations(created_at);

4. messages

Individual chat messages.

CREATE TYPE message_role AS ENUM ('user', 'assistant', 'system');

CREATE TABLE messages (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
    role message_role NOT NULL,
    content TEXT NOT NULL,
    
    -- LLM metadata
    tokens_used INTEGER,
    model VARCHAR(100),
    
    -- Additional metadata (temperature, latency, etc.)
    metadata JSONB DEFAULT '{}',
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX idx_messages_conversation_id ON messages(conversation_id);
CREATE INDEX idx_messages_created_at ON messages(created_at);
CREATE INDEX idx_messages_conversation_created ON messages(conversation_id, created_at);

5. character_knowledge

Multiple knowledge sources for characters. Each source is chunked and stored with embeddings in vector_memories.

CREATE TYPE import_source_type AS ENUM ('file', 'url', 'manual');
CREATE TYPE import_status AS ENUM ('pending', 'processing', 'completed', 'failed');

CREATE TABLE character_knowledge (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    character_id UUID NOT NULL REFERENCES characters(id) ON DELETE CASCADE,
    
    -- Knowledge source info
    name VARCHAR(255) NOT NULL,  -- Display name
    source_type import_source_type NOT NULL,
    source_name VARCHAR(255) NOT NULL,  -- Original filename or URL
    mime_type VARCHAR(100),
    file_size BIGINT,
    
    -- Raw content (before chunking)
    raw_content TEXT,
    
    -- Processing status
    status import_status DEFAULT 'pending',
    processing_info JSONB,  -- chunks count, errors, etc.
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX idx_character_knowledge_character ON character_knowledge(character_id);
CREATE INDEX idx_character_knowledge_status ON character_knowledge(status);

6. vector_memories

Unified vector embeddings storage for:

  • Character knowledge - Background info, imported documents (linked to character_knowledge)
  • Conversation history - Chat context (linked to conversations)
CREATE TYPE memory_type AS ENUM ('conversation', 'character');

CREATE TABLE vector_memories (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    
    -- The text chunk
    content TEXT NOT NULL,
    
    -- Vector embedding (configurable dimension based on model)
    -- Common sizes: 384 (all-MiniLM-L6-v2), 768 (all-mpnet-base-v2), 1024 (BGE)
    -- Must match the EMBEDDING_DIMENSION env var
    embedding VECTOR({{EMBEDDING_DIMENSION}}),
    
    -- Memory type determines the context
    memory_type memory_type DEFAULT 'conversation',
    
    -- Metadata (chunk_index, source_info, etc.)
    metadata JSONB,
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    
    -- Polymorphic relations (at least one must be set)
    -- For conversation context
    conversation_id UUID REFERENCES conversations(id) ON DELETE CASCADE,
    
    -- For character knowledge
    character_id UUID REFERENCES characters(id) ON DELETE CASCADE,
    knowledge_id UUID REFERENCES character_knowledge(id) ON DELETE CASCADE
);

-- HNSW index for efficient similarity search
-- Note: Index is created after table creation based on actual dimension
-- CREATE INDEX idx_vector_memories_embedding ON vector_memories 
--     USING hnsw (embedding vector_cosine_ops);

CREATE INDEX idx_vector_memories_conversation ON vector_memories(conversation_id) WHERE conversation_id IS NOT NULL;
CREATE INDEX idx_vector_memories_character ON vector_memories(character_id) WHERE character_id IS NOT NULL;
CREATE INDEX idx_vector_memories_knowledge ON vector_memories(knowledge_id) WHERE knowledge_id IS NOT NULL;
CREATE INDEX idx_vector_memories_type ON vector_memories(memory_type);

7. import_documents

General-purpose imported documents (not linked to characters). For character knowledge, use character_knowledge.

CREATE TABLE import_documents (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
    
    source_type import_source_type NOT NULL,  -- file, url, manual
    source_name VARCHAR(255) NOT NULL,  -- filename or URL
    
    -- Mime type for files
    mime_type VARCHAR(100),
    
    -- File size in bytes
    file_size BIGINT,
    
    -- Raw content (preprocessed)
    content TEXT,
    
    -- Processing status
    status import_status DEFAULT 'pending',
    error_message TEXT,
    
    -- Metadata (source info, extraction method, etc.)
    metadata JSONB DEFAULT '{}',
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    updated_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX idx_import_documents_user_id ON import_documents(user_id);
CREATE INDEX idx_import_documents_status ON import_documents(status);

8. story_branches (Phase 2)

Tree structure for branching narratives.

CREATE TABLE story_branches (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
    
    -- Self-referential for tree structure
    parent_id UUID REFERENCES story_branches(id) ON DELETE CASCADE,
    
    -- Branch content
    title VARCHAR(255),
    content TEXT NOT NULL,  -- The generated story content
    
    -- User direction that led to this branch
    user_direction TEXT,
    
    -- Branch metadata
    generation_params JSONB DEFAULT '{}',
    
    -- Tree position
    depth INTEGER DEFAULT 0,
    branch_order INTEGER DEFAULT 0,
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW()
);

CREATE INDEX idx_story_branches_conversation ON story_branches(conversation_id);
CREATE INDEX idx_story_branches_parent ON story_branches(parent_id);

9. conversation_participants (Phase 3 - Multi-Character)

Supports multiple characters in a single conversation.

CREATE TABLE conversation_participants (
    id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
    conversation_id UUID NOT NULL REFERENCES conversations(id) ON DELETE CASCADE,
    character_id UUID NOT NULL REFERENCES characters(id) ON DELETE CASCADE,
    
    -- Participant settings
    is_active BOOLEAN DEFAULT true,
    auto_respond BOOLEAN DEFAULT true,  -- Auto-generate responses
    
    created_at TIMESTAMP WITH TIME ZONE DEFAULT NOW(),
    
    UNIQUE(conversation_id, character_id)
);

CREATE INDEX idx_participants_conversation ON conversation_participants(conversation_id);

Enums

-- User roles
CREATE TYPE user_role AS ENUM ('USER', 'ADMIN');

-- Message roles
CREATE TYPE message_role AS ENUM ('user', 'assistant', 'system');

-- Import/knowledge source types
CREATE TYPE import_source_type AS ENUM ('file', 'url', 'manual');

-- Processing status
CREATE TYPE import_status AS ENUM ('pending', 'processing', 'completed', 'failed');

-- Vector memory types
CREATE TYPE memory_type AS ENUM ('conversation', 'character');

Prisma Schema (Reference)

Prisma schema uses the multi-file schema feature. Models are organized in prisma/models/ folder and imported into schema.prisma.

Schema Structure

prisma/
├── schema.prisma          # Main schema file with imports
├── seed.ts                # Database seeding
└── models/
    ├── user.prisma        # User model + UserRole enum
    ├── character.prisma   # Character + CharacterKnowledge models
    ├── conversation.prisma # Conversation + ConversationParticipant
    ├── message.prisma     # Message model + MessageRole enum
    ├── vectorMemory.prisma # VectorMemory + MemoryType enum
    ├── importDocument.prisma # ImportDocument model
    └── storyBranch.prisma # StoryBranch model

Main Schema (schema.prisma)

// schema.prisma

generator client {
  provider = "prisma-client-js"
}

datasource db {
  provider = "postgresql"
  url      = env("DATABASE_URL")
}

// Import all models from the models folder
import { User } from "./models/user"
import { Character, CharacterKnowledge } from "./models/character"
import { Conversation, ConversationParticipant } from "./models/conversation"
import { Message } from "./models/message"
import { VectorMemory } from "./models/vectorMemory"
import { ImportDocument } from "./models/importDocument"
import { StoryBranch } from "./models/storyBranch"

Full Schema Definition

// models/user.prisma

generator client {
  provider = "prisma-client-js"
}

datasource db {
  provider = "postgresql"
  url      = env("DATABASE_URL")
}

// Enums
enum UserRole {
  USER
  ADMIN
}

enum MessageRole {
  user
  assistant
  system
}

enum ImportSourceType {
  file
  url
  manual
}

enum ImportStatus {
  pending
  processing
  completed
  failed
}

enum MemoryType {
  conversation
  character
}

// Models
model User {
  id            String    @id @default(uuid())
  email         String    @unique
  username      String    @unique
  passwordHash  String?
  keycloakSub   String?   @unique
  role          UserRole  @default(USER)
  isActive      Boolean   @default(true)
  createdAt     DateTime  @default(now())
  updatedAt     DateTime  @updatedAt

  characters    Character[]
  conversations Conversation[]
  importDocs    ImportDocument[]

  @@index([email])
  @@index([keycloakSub])
}

model Character {
  id                  String   @id @default(uuid())
  name                String
  avatarUrl           String?
  personalityPrompt   String
  attributes          Json     @default("{}")
  config              Json     @default("{}")
  isPublic            Boolean  @default(false)
  createdAt           DateTime @default(now())
  updatedAt           DateTime @updatedAt

  userId              String
  user                User                @relation(fields: [userId], references: [id], onDelete: Cascade)
  conversations       Conversation[]
  knowledgeSources    CharacterKnowledge[]
  vectorMemories      VectorMemory[]

  @@index([userId])
  @@index([name])
}

model CharacterKnowledge {
  id              String           @id @default(uuid())
  name            String
  sourceType      ImportSourceType
  sourceName      String
  mimeType        String?
  fileSize        BigInt?
  rawContent      String?
  status          ImportStatus     @default(pending)
  processingInfo  Json?
  createdAt       DateTime         @default(now())
  updatedAt       DateTime         @updatedAt

  characterId     String
  character       Character        @relation(fields: [characterId], references: [id], onDelete: Cascade)
  vectorMemories  VectorMemory[]

  @@index([characterId])
  @@index([status])
}

model Conversation {
  id             String    @id @default(uuid())
  title          String?
  messageCount   Int       @default(0)
  totalTokens    Int       @default(0)
  settings       Json      @default("{}")
  createdAt      DateTime  @default(now())
  updatedAt      DateTime  @updatedAt

  userId         String
  user           User                 @relation(fields: [userId], references: [id], onDelete: Cascade)
  characterId    String
  character      Character            @relation(fields: [characterId], references: [id], onDelete: Cascade)
  messages       Message[]
  vectorMemories VectorMemory[]
  storyBranches  StoryBranch[]
  participants   ConversationParticipant[]

  @@index([userId])
  @@index([characterId])
  @@index([createdAt])
}

model Message {
  id              String      @id @default(uuid())
  role            MessageRole
  content         String
  tokensUsed      Int?
  model           String?
  metadata        Json?
  createdAt       DateTime    @default(now())

  conversationId  String
  conversation    Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade)

  @@index([conversationId])
  @@index([createdAt])
  @@index([conversationId, createdAt])
}

model VectorMemory {
  id              String    @id @default(uuid())
  content         String
  embedding       Unsupported("vector")?
  memoryType      MemoryType @default(conversation)
  metadata        Json?
  createdAt       DateTime  @default(now())

  conversationId  String?
  conversation    Conversation? @relation(fields: [conversationId], references: [id], onDelete: Cascade)
  
  characterId     String?
  character       Character?    @relation(fields: [characterId], references: [id], onDelete: Cascade)
  
  knowledgeId     String?
  knowledge       CharacterKnowledge? @relation(fields: [knowledgeId], references: [id], onDelete: Cascade)

  @@index([conversationId])
  @@index([characterId])
  @@index([knowledgeId])
  @@index([memoryType])
}

model ImportDocument {
  id              String           @id @default(uuid())
  sourceType      ImportSourceType
  sourceName      String
  mimeType        String?
  fileSize        BigInt?
  content         String?
  status          ImportStatus     @default(pending)
  errorMessage    String?
  metadata        Json?
  createdAt       DateTime         @default(now())
  updatedAt       DateTime         @updatedAt

  userId          String
  user            User             @relation(fields: [userId], references: [id], onDelete: Cascade)

  @@index([userId])
  @@index([status])
}

model StoryBranch {
  id              String    @id @default(uuid())
  title           String?
  content         String
  userDirection   String
  generationParams Json?
  depth           Int       @default(0)
  branchOrder     Int       @default(0)
  createdAt       DateTime  @default(now())

  conversationId  String
  conversation    Conversation  @relation(fields: [conversationId], references: [id], onDelete: Cascade)
  parentId        String?
  parent          StoryBranch?  @relation("BranchTree", fields: [parentId], references: [id], onDelete: Cascade)
  children        StoryBranch[] @relation("BranchTree")

  @@index([conversationId])
  @@index([parentId])
}

model ConversationParticipant {
  id              String    @id @default(uuid())
  isActive        Boolean   @default(true)
  autoRespond     Boolean   @default(true)
  createdAt       DateTime  @default(now())

  conversationId  String
  conversation    Conversation @relation(fields: [conversationId], references: [id], onDelete: Cascade)
  characterId     String

  @@unique([conversationId, characterId])
  @@index([conversationId])
}

Prisma Client Usage Examples

// src/shared/prisma/prisma.service.ts
import { Injectable, OnModuleInit, OnModuleDestroy } from '@nestjs/common';
import { PrismaClient } from '@prisma/client';

@Injectable()
export class PrismaService extends PrismaClient implements OnModuleInit, OnModuleDestroy {
  async onModuleInit() {
    await this.$connect();
  }

  async onModuleDestroy() {
    await this.$disconnect();
  }
}
// Repository pattern with Prisma
@Injectable()
export class CharacterRepository {
  constructor(private prisma: PrismaService) {}

  async findByUser(userId: string) {
    return this.prisma.character.findMany({
      where: { userId },
      orderBy: { updatedAt: 'desc' },
    });
  }

  async create(data: CreateCharacterDto, userId: string) {
    return this.prisma.character.create({
      data: { ...data, userId },
    });
  }
}

Vector Memory Query with Prisma

// Similarity search using pgvector with Prisma
async similaritySearch(
  targetId: string,
  queryEmbedding: number[],
  memoryType: MemoryType,
  k: number = 5
) {
  // Build the where clause based on memory type
  const whereClause = memoryType === 'conversation' 
    ? { conversationId: targetId, memoryType }
    : { characterId: targetId, memoryType };

  const results = await this.prisma.$queryRaw`
    SELECT 
      id,
      content,
      metadata,
      embedding <=> ${queryEmbedding}::vector as distance
    FROM "VectorMemory"
    WHERE ${whereClause}
    ORDER BY embedding <=> ${queryEmbedding}::vector
    LIMIT ${k}
  `;
  
  return results;
}

// Search character knowledge
async searchCharacterKnowledge(
  characterId: string,
  queryEmbedding: number[],
  k: number = 5
) {
  const results = await this.prisma.$queryRaw`
    SELECT 
      vm.id,
      vm.content,
      vm.metadata,
      ck.name as source_name,
      1 - (vm.embedding <=> ${queryEmbedding}::vector) as similarity
    FROM "VectorMemory" vm
    JOIN "CharacterKnowledge" ck ON vm."knowledgeId" = ck.id
    WHERE vm."characterId" = ${characterId}
      AND vm."memoryType" = 'character'
    ORDER BY similarity DESC
    LIMIT ${k}
  `;
  
  return results;
}

Embedding Configuration

// Configuration for embedding providers
interface EmbeddingConfig {
  provider: 'local' | 'huggingface-api';
  model: string;
  dimension: number;
  // For local provider
  localModelPath?: string;
  // For HuggingFace API
  apiKey?: string;
  apiEndpoint?: string;
}

// Example configurations:
// Local: { provider: 'local', model: 'Xenova/all-MiniLM-L6-v2', dimension: 384 }
// HF API: { provider: 'huggingface-api', model: 'sentence-transformers/all-mpnet-base-v2', dimension: 768 }

Prisma Migration Strategy

Initial Migration

# 1. Initialize Prisma
npx prisma init

# 2. Define schema in prisma/schema.prisma

# 3. Create first migration
npx prisma migrate dev --name init

# 4. Generate Prisma Client
npx prisma generate

Migration Workflow

# After schema changes
npx prisma migrate dev --name descriptive_name

# Production deployment
npx prisma migrate deploy

# Generate client (in CI/CD)
npx prisma generate

Migration File Example

-- migrations/20240223120000_init/migration.sql

-- Enable extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "vector";

-- CreateEnum
CREATE TYPE "UserRole" AS ENUM ('USER', 'ADMIN');

-- CreateEnum
CREATE TYPE "MessageRole" AS ENUM ('user', 'assistant', 'system');

-- CreateTable
CREATE TABLE "User" (
    "id" UUID NOT NULL DEFAULT uuid_generate_v4(),
    "email" TEXT NOT NULL,
    "username" TEXT NOT NULL,
    -- ... etc
);

-- CreateIndex
CREATE UNIQUE INDEX "User_email_key" ON "User"("email");

-- Vector index (HNSW) - created manually after migration
CREATE INDEX idx_vector_memories_embedding ON "VectorMemory" 
    USING hnsw (embedding vector_cosine_ops);

Seeding

// prisma/seed.ts
import { PrismaClient } from '@prisma/client';

const prisma = new PrismaClient();

async function main() {
  // Seed default admin or test data
  await prisma.user.create({
    data: {
      email: 'admin@dreamchat.local',
      username: 'admin',
      role: 'ADMIN',
    },
  });
}

main()
  .catch(console.error)
  .finally(() => prisma.$disconnect());
# Run seed
npx prisma db seed

Backup Strategy

# pg_dump with custom format
docker exec dreamchat-postgres pg_dump -U postgres -Fc dreamchat > backup.dump

# Restore
pg_restore -U postgres -d dreamchat backup.dump