feat: Implement knowledge import feature for characters

- Added KnowledgeImport page for importing character knowledge from URLs.
- Integrated URL validation and error handling for unsupported websites.
- Created API endpoints for importing content from URLs and retrieving character knowledge.
- Enhanced VectorStoreService with logging and error handling for vector memory storage.
- Updated frontend to display knowledge sources and manage them effectively.
- Added support for fetching recent character knowledge as a fallback in similarity searches.
- Updated OpenAPI documentation to reflect new import functionality.
This commit is contained in:
GW_MC
2026-02-24 14:29:26 +00:00
parent 8714d6bd22
commit e033d67ec1
30 changed files with 2018 additions and 204 deletions

View File

@@ -0,0 +1,14 @@
module.exports = {
moduleFileExtensions: ['js', 'json', 'ts'],
rootDir: 'src',
testRegex: '.*\\.spec\\.ts$',
transform: {
'^.+\\.(t|j)s$': 'ts-jest',
},
collectCoverageFrom: ['**/*.(t|j)s'],
coverageDirectory: '../coverage',
testEnvironment: 'node',
moduleNameMapper: {
'^@dreamchat/shared$': '<rootDir>/../../packages/shared/dist',
},
};

View File

@@ -4,7 +4,9 @@
"scripts": {
"build": "nest build",
"dev": "nest start --watch",
"start": "node dist/main",
"start": "node --max-old-space-size=768 dist/main",
"start:low-memory": "node --max-old-space-size=384 dist/main",
"start:high-memory": "node --max-old-space-size=1536 dist/main",
"test": "jest",
"test:watch": "jest --watch",
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\"",
@@ -46,6 +48,7 @@
"@nestjs/cli": "^11.0.16",
"@nestjs/testing": "^11.1.14",
"@types/bcrypt": "^6.0.0",
"@types/jest": "^30.0.0",
"@types/jsonwebtoken": "^9.0.0",
"@types/multer": "^1.4.12",
"@types/node": "^24.10.13",
@@ -53,6 +56,7 @@
"@types/passport-local": "^1.0.0",
"jest": "^30.2.0",
"prisma": "^7.4.1",
"ts-jest": "^29.4.6",
"typescript": "^5.3.0"
}
}

View File

@@ -1,4 +1,4 @@
import { Module } from '@nestjs/common';
import { Module, NestModule, MiddlewareConsumer } from '@nestjs/common';
import { APP_GUARD } from '@nestjs/core';
import { PrismaModule } from './prisma/prisma.module';
import { AuthModule } from './auth/auth.module';
@@ -9,6 +9,7 @@ import { VectorModule } from './vector/vector.module';
import { ChatModule } from './chat/chat.module';
import { ImportModule } from './import/import.module';
import { JwtAuthGuard } from './auth/guards/jwt-auth.guard';
import { RequestLoggerMiddleware } from './common/middleware';
@Module({
imports: [
@@ -29,4 +30,10 @@ import { JwtAuthGuard } from './auth/guards/jwt-auth.guard';
},
],
})
export class AppModule {}
export class AppModule implements NestModule {
configure(consumer: MiddlewareConsumer) {
consumer
.apply(RequestLoggerMiddleware)
.forRoutes('*');
}
}

View File

@@ -8,7 +8,7 @@ import {
OnGatewayDisconnect,
} from '@nestjs/websockets';
import { Server, Socket } from 'socket.io';
import { UseGuards } from '@nestjs/common';
import { UseGuards, Logger } from '@nestjs/common';
import { ChatService } from './chat.service';
import { JwtService } from '@nestjs/jwt';
@@ -27,6 +27,8 @@ export class ChatGateway implements OnGatewayConnection, OnGatewayDisconnect {
@WebSocketServer()
server: Server;
private readonly logger = new Logger(ChatGateway.name);
constructor(
private chatService: ChatService,
private jwtService: JwtService,
@@ -83,6 +85,8 @@ export class ChatGateway implements OnGatewayConnection, OnGatewayDisconnect {
) {
if (!client.userId) return;
this.logger.debug(`[handleSendMessage] Received message from user ${client.userId} for conversation ${data.conversationId}: "${data.content}"`);
const room = `conversation:${data.conversationId}`;
try {
@@ -119,6 +123,7 @@ export class ChatGateway implements OnGatewayConnection, OnGatewayDisconnect {
conversationId: data.conversationId,
assistantMessage,
});
this.logger.debug(`[handleSendMessage] Message streaming completed for conversation ${data.conversationId}`);
} catch (error) {
client.emit('error', {
conversationId: data.conversationId,

View File

@@ -1,4 +1,4 @@
import { Injectable, NotFoundException, ForbiddenException } from '@nestjs/common';
import { Injectable, NotFoundException, ForbiddenException, Logger } from '@nestjs/common';
import { PrismaService } from '../prisma/prisma.service';
import { LLMService } from '../llm/llm.service';
import { MemoryService } from '../vector/memory.service';
@@ -8,6 +8,8 @@ import { Conversation, Message, MessageRole } from '@prisma/client';
@Injectable()
export class ChatService {
private readonly logger = new Logger(ChatService.name);
constructor(
private prisma: PrismaService,
private llmService: LLMService,
@@ -15,15 +17,9 @@ export class ChatService {
private characterService: CharacterService,
) {}
async createConversation(
userId: string,
createConversationDto: CreateConversationDto,
): Promise<Conversation> {
async createConversation(userId: string, createConversationDto: CreateConversationDto): Promise<Conversation> {
// Verify character exists and user has access
const character = await this.characterService.findById(
createConversationDto.characterId,
userId,
);
const character = await this.characterService.findById(createConversationDto.characterId, userId);
return this.prisma.conversation.create({
data: {
@@ -96,11 +92,7 @@ export class ChatService {
});
}
async sendMessage(
conversationId: string,
userId: string,
sendMessageDto: SendMessageDto,
): Promise<{ userMessage: Message; assistantMessage: Message }> {
async sendMessage(conversationId: string, userId: string, sendMessageDto: SendMessageDto): Promise<{ userMessage: Message; assistantMessage: Message }> {
const conversation = await this.findConversationById(conversationId, userId);
// Create user message
@@ -113,25 +105,20 @@ export class ChatService {
});
// Store user message in vector memory
await this.memoryService.storeConversationMessage(
`User: ${sendMessageDto.content}`,
conversationId,
{ messageId: userMessage.id },
);
await this.memoryService.storeConversationMessage(`User: ${sendMessageDto.content}`, conversationId, { messageId: userMessage.id });
// Generate context from memory
const memoryContext = await this.memoryService.buildContextForConversation(
conversationId,
sendMessageDto.content,
conversation.characterId,
);
const memoryContext = await this.memoryService.buildContextForConversation(conversationId, sendMessageDto.content, conversation.characterId);
// Build messages for LLM
const messages = this.buildLLMMessages(
conversation.character.personalityPrompt,
conversation.messages,
sendMessageDto.content,
memoryContext,
const messages = this.buildLLMMessages(conversation.character.personalityPrompt, conversation.messages, sendMessageDto.content, memoryContext);
// Grouped debug logging
this.logger.debug(
`[sendMessage] conversation=${conversationId}\n` +
`--- Knowledges/Context ---\n${memoryContext || '(none)'}\n` +
`--- Full Messages to LLM ---\n` +
messages.map((msg, idx) => `[${idx}] ${msg.role}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? '...' : ''}`).join('\n'),
);
// Generate response
@@ -161,20 +148,12 @@ export class ChatService {
});
// Store assistant response in vector memory
await this.memoryService.storeConversationMessage(
`${conversation.character.name}: ${response.content}`,
conversationId,
{ messageId: assistantMessage.id },
);
await this.memoryService.storeConversationMessage(`${conversation.character.name}: ${response.content}`, conversationId, { messageId: assistantMessage.id });
return { userMessage, assistantMessage };
}
async *streamMessage(
conversationId: string,
userId: string,
sendMessageDto: SendMessageDto,
): AsyncGenerator<{ type: 'chunk' | 'message'; data: any }> {
async *streamMessage(conversationId: string, userId: string, sendMessageDto: SendMessageDto): AsyncGenerator<{ type: 'chunk' | 'message'; data: any }> {
const conversation = await this.findConversationById(conversationId, userId);
// Create user message
@@ -189,25 +168,20 @@ export class ChatService {
yield { type: 'message', data: { userMessage } };
// Store user message in vector memory
await this.memoryService.storeConversationMessage(
`User: ${sendMessageDto.content}`,
conversationId,
{ messageId: userMessage.id },
);
await this.memoryService.storeConversationMessage(`User: ${sendMessageDto.content}`, conversationId, { messageId: userMessage.id });
// Generate context from memory
const memoryContext = await this.memoryService.buildContextForConversation(
conversationId,
sendMessageDto.content,
conversation.characterId,
);
const memoryContext = await this.memoryService.buildContextForConversation(conversationId, sendMessageDto.content, conversation.characterId);
// Build messages for LLM
const messages = this.buildLLMMessages(
conversation.character.personalityPrompt,
conversation.messages,
sendMessageDto.content,
memoryContext,
const messages = this.buildLLMMessages(conversation.character.personalityPrompt, conversation.messages, sendMessageDto.content, memoryContext);
// Grouped debug logging
this.logger.debug(
`[streamMessage] conversation=${conversationId}\n` +
`--- Knowledges/Context ---\n${memoryContext || '(none)'}\n` +
`--- Full Messages to LLM ---\n` +
messages.map((msg, idx) => `[${idx}] ${msg.role}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? '...' : ''}`).join('\n'),
);
// Generate streaming response
@@ -233,10 +207,7 @@ export class ChatService {
});
// Update conversation stats
const tokensUsed = this.llmService.countTokens([
...messages,
{ role: 'assistant', content: fullContent },
]);
const tokensUsed = this.llmService.countTokens([...messages, { role: 'assistant', content: fullContent }]);
await this.prisma.conversation.update({
where: { id: conversationId },
@@ -247,11 +218,7 @@ export class ChatService {
});
// Store assistant response in vector memory
await this.memoryService.storeConversationMessage(
`${conversation.character.name}: ${fullContent}`,
conversationId,
{ messageId: assistantMessage.id },
);
await this.memoryService.storeConversationMessage(`${conversation.character.name}: ${fullContent}`, conversationId, { messageId: assistantMessage.id });
yield { type: 'message', data: { assistantMessage } };
}
@@ -265,7 +232,10 @@ export class ChatService {
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
// Add system message with personality and context
let systemContent = personalityPrompt;
let systemContent = `You are now role playing as the character based on the following personality description. You should use this information to inform your responses and stay in character. Always try to stay in character and provide responses that align with the personality and history provided. You are now talking to a user. The user will say something, and you will respond as the character. Remember this is a conversation, keep it talking like and maintain your character\n\n`;
if (personalityPrompt) {
systemContent = personalityPrompt;
}
if (memoryContext) {
systemContent += `\n\nUse the following context to inform your responses:\n${memoryContext}`;
}

View File

@@ -0,0 +1 @@
export { RequestLoggerMiddleware } from './request-logger.middleware';

View File

@@ -0,0 +1,62 @@
import { Injectable, NestMiddleware, Logger } from '@nestjs/common';
import { Request, Response, NextFunction } from 'express';
@Injectable()
export class RequestLoggerMiddleware implements NestMiddleware {
private logger = new Logger('HTTP');
private readonly isEnabled: boolean;
private readonly logLevel: 'verbose' | 'standard' | 'minimal';
constructor() {
this.isEnabled = process.env.REQUEST_LOGGER !== 'false';
const level = process.env.REQUEST_LOGGER_LEVEL;
this.logLevel = level === 'verbose' || level === 'minimal' ? level : 'standard';
}
use(req: Request, res: Response, next: NextFunction) {
if (!this.isEnabled) {
return next();
}
const { method, originalUrl, ip, headers } = req;
const userAgent = headers['user-agent'] || 'unknown';
const startTime = Date.now();
// Log request start (verbose only)
if (this.logLevel === 'verbose') {
this.logger.log(`${method} ${originalUrl} - ${ip} - ${userAgent}`);
}
// Capture response finish
res.on('finish', () => {
const duration = Date.now() - startTime;
const statusCode = res.statusCode;
const contentLength = res.get('content-length') || 0;
// Build message based on log level
let message: string;
switch (this.logLevel) {
case 'verbose':
message = `${method} ${originalUrl} ${statusCode} - ${duration}ms - ${contentLength}b - ${ip}`;
break;
case 'minimal':
message = `${method} ${originalUrl} ${statusCode}`;
break;
default: // standard
message = `${method} ${originalUrl} ${statusCode} - ${duration}ms`;
break;
}
// Determine log level based on status code
if (statusCode >= 500) {
this.logger.error(message);
} else if (statusCode >= 400) {
this.logger.warn(message);
} else {
this.logger.log(message);
}
});
next();
}
}

View File

@@ -4,9 +4,11 @@ import {
Get,
Delete,
Param,
Body,
UploadedFile,
UseInterceptors,
BadRequestException,
Logger,
} from '@nestjs/common';
import { FileInterceptor } from '@nestjs/platform-express';
import { ApiTags, ApiOperation, ApiResponse, ApiBearerAuth, ApiParam, ApiConsumes, ApiBody, ApiProperty } from '@nestjs/swagger';
@@ -22,10 +24,17 @@ class UploadResponseDto {
message: string;
}
class ImportUrlDto {
@ApiProperty({ description: 'URL to import', example: 'https://sakurazaka46.com/s/s46/diary/detail/68008' })
url: string;
}
@ApiTags('import')
@ApiBearerAuth()
@Controller('import')
export class ImportController {
private readonly logger = new Logger(ImportController.name);
constructor(private importService: ImportService) {}
@Post('characters/:characterId/files')
@@ -98,4 +107,28 @@ export class ImportController {
await this.importService.deleteKnowledge(knowledgeId, userId);
return { message: 'Knowledge deleted successfully' };
}
@Post('characters/:characterId/url')
@ApiOperation({ summary: 'Import content from URL for character knowledge' })
@ApiParam({ name: 'characterId', description: 'Character ID' })
@ApiBody({ type: ImportUrlDto })
@ApiResponse({ status: 201, description: 'URL content is being imported and processed', type: UploadResponseDto })
@ApiResponse({ status: 400, description: 'Invalid URL or unsupported website' })
@ApiResponse({ status: 401, description: 'Unauthorized' })
async importFromUrl(
@Param('characterId') characterId: string,
@Body() importUrlDto: ImportUrlDto,
@CurrentUser('userId') userId: string,
): Promise<UploadResponseDto> {
this.logger.log(`Received URL import request for character: ${characterId}, url: ${importUrlDto.url}`);
if (!importUrlDto.url) {
this.logger.warn('URL import request rejected: URL is required');
throw new BadRequestException('URL is required');
}
const result = await this.importService.importFromUrl(importUrlDto.url, characterId, userId);
this.logger.log(`URL import started, knowledgeId: ${result.knowledgeId}`);
return result;
}
}

View File

@@ -1,19 +1,23 @@
import { Injectable, BadRequestException } from '@nestjs/common';
import { Injectable, BadRequestException, Logger } from '@nestjs/common';
import { PrismaService } from '../prisma/prisma.service';
import { MemoryService } from '../vector/memory.service';
import { TextFileAdapter } from './adapters/text-file.adapter';
import { IImportAdapter, ImportResult } from './interfaces/import-adapter.interface';
import { ImportStatus } from '@prisma/client';
import { IWebScraper, WebScraperResult } from './interfaces/web-scraper.interface';
import { SakurazakaScraper } from './scrapers/sakurazaka-scraper';
@Injectable()
export class ImportService {
private readonly logger = new Logger(ImportService.name);
private adapters: IImportAdapter[];
private scrapers: IWebScraper[];
constructor(
private prisma: PrismaService,
private memoryService: MemoryService,
) {
this.adapters = [new TextFileAdapter()];
this.scrapers = [new SakurazakaScraper()];
}
async uploadFile(
@@ -33,6 +37,14 @@ export class ImportService {
// Parse the file
const result = await adapter.parse(file);
// Reject if content is too large
const maxRawContentLength = 100000;
if (result.content.length > maxRawContentLength) {
throw new BadRequestException(
`File too large: ${result.content.length} characters (max: ${maxRawContentLength}). Please use a smaller file.`
);
}
// Create knowledge entry
const knowledge = await this.prisma.characterKnowledge.create({
data: {
@@ -49,18 +61,9 @@ export class ImportService {
});
// Process the content in the background
this.logger.log(`[${knowledge.id}] Starting background processing for file upload, character: ${characterId}`);
this.processContent(knowledge.id, characterId, result).catch((error) => {
console.error('Error processing import:', error);
this.prisma.characterKnowledge.update({
where: { id: knowledge.id },
data: {
status: 'failed',
processingInfo: {
...result.metadata,
error: error instanceof Error ? error.message : 'Unknown error',
},
},
});
this.logger.error(`[${knowledge.id}] Error processing file import:`, error);
});
return {
@@ -141,24 +144,86 @@ export class ImportService {
characterId: string,
result: ImportResult,
): Promise<void> {
try {
// Chunk the content into smaller pieces
const chunks = this.chunkContent(result.content, 1000, 200);
this.logger.log(`[${knowledgeId}] Starting processContent, content length: ${result.content.length}`);
// Reduce memory pressure by limiting content size more aggressively
const maxContentLength = 15000; // Reduced from 30000
let content = result.content;
if (content.length > maxContentLength) {
this.logger.warn(`[${knowledgeId}] Content truncated from ${content.length} to ${maxContentLength} characters`);
content = content.substring(0, maxContentLength) + '\n\n[Content truncated due to size limits]';
}
// Store each chunk in vector memory
for (let i = 0; i < chunks.length; i++) {
await this.memoryService.storeCharacterKnowledge(
chunks[i],
characterId,
knowledgeId,
{
...result.metadata,
chunkIndex: i,
totalChunks: chunks.length,
},
);
// Process chunks one at a time using generator to minimize memory usage
const chunkSize = 800; // Reduced from 1000
const overlap = 100; // Reduced from 200
const maxChunks = 20; // Reduced from 30
let processedChunks = 0;
let totalChunks = 0;
let start = 0;
try {
this.logger.log(`[${knowledgeId}] Processing chunks with streaming approach...`);
while (start < content.length && processedChunks < maxChunks) {
// Calculate chunk boundaries
const end = Math.min(start + chunkSize, content.length);
let chunkEnd = end;
// Try to break at a sentence boundary (only if not at end)
if (end < content.length) {
const chunk = content.slice(start, end);
const lastPeriod = chunk.lastIndexOf('.');
const lastNewline = chunk.lastIndexOf('\n');
const breakPoint = Math.max(lastPeriod, lastNewline);
if (breakPoint > chunkSize * 0.5) {
chunkEnd = start + breakPoint + 1;
}
}
// Extract the chunk
const chunk = content.slice(start, chunkEnd).trim();
if (chunk.length > 0) {
totalChunks++;
if (processedChunks < maxChunks) {
this.logger.debug(`[${knowledgeId}] Processing chunk ${processedChunks + 1} (${chunk.length} chars)...`);
try {
await this.memoryService.storeCharacterKnowledge(
chunk,
characterId,
knowledgeId,
{
...result.metadata,
chunkIndex: processedChunks,
},
);
processedChunks++;
this.logger.debug(`[${knowledgeId}] Chunk ${processedChunks} stored successfully`);
} catch (chunkError) {
this.logger.error(`[${knowledgeId}] Failed to store chunk ${processedChunks + 1}:`, chunkError);
throw chunkError;
}
// Force garbage collection opportunity between chunks
if (processedChunks % 2 === 0) {
await new Promise((resolve) => setTimeout(resolve, 150));
}
}
}
// Move to next chunk position
const nextStart = start + (chunkEnd - start) - overlap;
if (nextStart <= start) break; // Prevent infinite loop
start = nextStart;
}
this.logger.log(`[${knowledgeId}] Processed ${processedChunks}/${totalChunks} chunks, updating status to completed`);
// Update status to completed
await this.prisma.characterKnowledge.update({
where: { id: knowledgeId },
@@ -166,49 +231,219 @@ export class ImportService {
status: 'completed',
processingInfo: {
...result.metadata,
chunksProcessed: chunks.length,
chunksProcessed: processedChunks,
originalChunks: totalChunks,
wasTruncated: result.content.length > maxContentLength || totalChunks > maxChunks,
},
},
});
this.logger.log(`[${knowledgeId}] Processing completed successfully`);
} catch (error) {
this.logger.error(`[${knowledgeId}] Error in processContent:`, error);
// Update status to failed
await this.prisma.characterKnowledge.update({
where: { id: knowledgeId },
data: {
status: 'failed',
processingInfo: {
...result.metadata,
error: error instanceof Error ? error.message : 'Unknown error',
try {
await this.prisma.characterKnowledge.update({
where: { id: knowledgeId },
data: {
status: 'failed',
processingInfo: {
...result.metadata,
error: error instanceof Error ? error.message : 'Unknown error',
},
},
},
});
});
this.logger.log(`[${knowledgeId}] Status updated to failed`);
} catch (dbError) {
this.logger.error(`[${knowledgeId}] Failed to update status to failed:`, dbError);
}
throw error;
}
}
private chunkContent(content: string, chunkSize: number, overlap: number): string[] {
const chunks: string[] = [];
let start = 0;
while (start < content.length) {
const end = Math.min(start + chunkSize, content.length);
let chunk = content.slice(start, end);
// Try to break at a sentence boundary
if (end < content.length) {
const lastPeriod = chunk.lastIndexOf('.');
const lastNewline = chunk.lastIndexOf('\n');
const breakPoint = Math.max(lastPeriod, lastNewline);
if (breakPoint > chunkSize * 0.5) {
chunk = chunk.slice(0, breakPoint + 1);
}
}
chunks.push(chunk.trim());
start += chunk.length - overlap;
async importFromUrl(
url: string,
characterId: string,
userId: string,
): Promise<{ knowledgeId: string; message: string }> {
// Validate URL format
let urlObj: URL;
try {
urlObj = new URL(url);
} catch {
throw new BadRequestException('Invalid URL format');
}
return chunks;
// Find appropriate scraper
const scraper = this.scrapers.find((s) => s.canHandle(url));
if (!scraper) {
throw new BadRequestException(
`Unsupported URL: ${urlObj.hostname}. No scraper available for this website.`,
);
}
// Scrape the content
const result = await scraper.scrape(url);
// Reject if content is too large
const maxRawContentLength = 100000;
if (result.content.length > maxRawContentLength) {
throw new BadRequestException(
`Content too large: ${result.content.length} characters (max: ${maxRawContentLength}). Please use a shorter article.`
);
}
// Create knowledge entry
const knowledgeName = result.metadata.title || `Import from ${urlObj.hostname}`;
const knowledge = await this.prisma.characterKnowledge.create({
data: {
name: knowledgeName,
sourceType: 'url',
sourceName: url,
mimeType: 'text/html',
rawContent: result.content,
status: 'processing',
processingInfo: result.metadata,
characterId,
},
});
// Process the content in the background
this.logger.log(`[${knowledge.id}] Starting background processing for URL: ${url}, character: ${characterId}`);
this.processScrapedContent(knowledge.id, characterId, result).catch((error) => {
this.logger.error(`[${knowledge.id}] Error processing URL import:`, error);
});
return {
knowledgeId: knowledge.id,
message: 'URL content is being imported and processed',
};
}
private async processScrapedContent(
knowledgeId: string,
characterId: string,
result: WebScraperResult,
): Promise<void> {
this.logger.log(`[${knowledgeId}] Starting processScrapedContent, content length: ${result.content.length}`);
// Reduce memory pressure by limiting content size more aggressively
const maxContentLength = 15000; // Reduced from 30000
let content = result.content;
if (content.length > maxContentLength) {
this.logger.warn(`[${knowledgeId}] Content truncated from ${content.length} to ${maxContentLength} characters`);
content = content.substring(0, maxContentLength) + '\n\n[Content truncated due to size limits]';
}
// Process chunks one at a time using streaming approach to minimize memory usage
const chunkSize = 800; // Reduced from 1000
const overlap = 100; // Reduced from 200
const maxChunks = 20; // Reduced from 30
let processedChunks = 0;
let totalChunks = 0;
let start = 0;
try {
this.logger.log(`[${knowledgeId}] Processing chunks with streaming approach...`);
while (start < content.length && processedChunks < maxChunks) {
// Calculate chunk boundaries
const end = Math.min(start + chunkSize, content.length);
let chunkEnd = end;
// Try to break at a sentence boundary (only if not at end)
if (end < content.length) {
const chunk = content.slice(start, end);
const lastPeriod = chunk.lastIndexOf('.');
const lastNewline = chunk.lastIndexOf('\n');
const breakPoint = Math.max(lastPeriod, lastNewline);
if (breakPoint > chunkSize * 0.5) {
chunkEnd = start + breakPoint + 1;
}
}
// Extract the chunk
const chunk = content.slice(start, chunkEnd).trim();
if (chunk.length > 0) {
totalChunks++;
if (processedChunks < maxChunks) {
this.logger.debug(`[${knowledgeId}] Processing chunk ${processedChunks + 1} (${chunk.length} chars)...`);
try {
await this.memoryService.storeCharacterKnowledge(
chunk,
characterId,
knowledgeId,
{
...result.metadata,
chunkIndex: processedChunks,
},
);
processedChunks++;
this.logger.debug(`[${knowledgeId}] Chunk ${processedChunks} stored successfully`);
} catch (chunkError) {
this.logger.error(`[${knowledgeId}] Failed to store chunk ${processedChunks + 1}:`, chunkError);
throw chunkError;
}
// Force garbage collection opportunity between chunks
if (processedChunks % 2 === 0) {
await new Promise((resolve) => setTimeout(resolve, 150));
}
}
}
// Move to next chunk position
const nextStart = start + (chunkEnd - start) - overlap;
if (nextStart <= start) break; // Prevent infinite loop
start = nextStart;
}
this.logger.log(`[${knowledgeId}] Processed ${processedChunks}/${totalChunks} chunks, updating status to completed`);
// Update status to completed
await this.prisma.characterKnowledge.update({
where: { id: knowledgeId },
data: {
status: 'completed',
processingInfo: {
...result.metadata,
chunksProcessed: processedChunks,
originalChunks: totalChunks,
wasTruncated: result.content.length > maxContentLength || totalChunks > maxChunks,
},
},
});
this.logger.log(`[${knowledgeId}] Processing completed successfully`);
} catch (error) {
this.logger.error(`[${knowledgeId}] Error in processScrapedContent:`, error);
// Update status to failed
try {
await this.prisma.characterKnowledge.update({
where: { id: knowledgeId },
data: {
status: 'failed',
processingInfo: {
...result.metadata,
error: error instanceof Error ? error.message : 'Unknown error',
},
},
});
this.logger.log(`[${knowledgeId}] Status updated to failed`);
} catch (dbError) {
this.logger.error(`[${knowledgeId}] Failed to update status to failed:`, dbError);
}
throw error;
}
}
}

View File

@@ -0,0 +1,2 @@
export { IImportAdapter, ImportResult } from './import-adapter.interface';
export { IWebScraper, WebScraperResult } from './web-scraper.interface';

View File

@@ -0,0 +1,23 @@
export interface WebScraperResult {
content: string;
metadata: {
sourceName: string;
url: string;
title?: string;
author?: string;
publishedDate?: string;
[key: string]: any;
};
}
export interface IWebScraper {
/**
* Check if this scraper can handle the given URL
*/
canHandle(url: string): boolean;
/**
* Scrape content from the URL
*/
scrape(url: string): Promise<WebScraperResult>;
}

View File

@@ -0,0 +1 @@
export { SakurazakaScraper } from './sakurazaka-scraper';

View File

@@ -0,0 +1,337 @@
import { SakurazakaScraper } from './sakurazaka-scraper';
describe('SakurazakaScraper', () => {
let scraper: SakurazakaScraper;
beforeEach(() => {
scraper = new SakurazakaScraper();
});
describe('canHandle', () => {
it('should return true for sakurazaka46.com URLs', () => {
const urls = [
'https://sakurazaka46.com/s/s46/diary/detail/68008',
'https://sakurazaka46.com/s/s46/diary/detail/68008?ima=0000&cd=blog',
'https://www.sakurazaka46.com/s/s46/diary/detail/10000',
'https://sakurazaka46.com/s/s46/diary/blog/list',
];
urls.forEach((url) => {
expect(scraper.canHandle(url)).toBe(true);
});
});
it('should return false for non-sakurazaka46.com URLs', () => {
const urls = [
'https://example.com/blog/123',
'https://nogizaka46.com/s/n46/diary/detail/12345',
'https://hinatazaka46.com/s/h46/diary/detail/12345',
'https://sakurazaka46.net/fake/blog/123',
'not-a-url',
'',
];
urls.forEach((url) => {
expect(scraper.canHandle(url)).toBe(false);
});
});
it('should return false for invalid URLs', () => {
expect(scraper.canHandle('invalid-url')).toBe(false);
expect(scraper.canHandle('')).toBe(false);
});
});
describe('scrape', () => {
const mockBlogHtml = `
<article class="post wovn-ignore">
<div class="col-l widfix-sp">
<div class="com-calendindinav">
<div class="wrap-bg">
<div class="inner">
<div class="year-month">
<div class="ym-inner wf-a">
<div class="ym-txt">
<span class="ym-year">2026</span>
<span class="ym-month">2</span>
</div>
<p class="date wf-a">18</p>
</div>
</div>
</div>
</div>
</div>
</div>
<div class="col-r widfix-sp">
<div class="inner title-wrap"><h1 class="title">The growing up train</h1></div>
</div>
<div class="col-l eigo-wrap pc">
<div class="eigo-inner">
<p class="eigo wf-a">YU MURAI</p>
</div>
</div>
<div class="col-r">
<div class="box-article">
<p><br/><br/>こんばんは<br/><br/>14thシングル<br/><br/>嬉しいです</p>
<p>四期生は肝が据わっています!<br/></p>
<img src="/files/14/diary/s46/blog/moblog/202602/mobpqiCQR.jpg"/>
<p>またね〜<br/>村井優</p>
</div>
<div class="blog-foot-nav">
<div class="com-btn-lcr">
<p class="btn-type1s"><a href="/s/s46/diary/detail/67798">前へ</a></p>
<p class="btn-type3"><a href="/s/s46/diary/blog/list?ct=67">村井 優のブログ一覧</a></p>
</div>
</div>
<div class="app-button">
<div class="box-a">
<p class="app-title">櫻坂46メッセージ</p>
</div>
</div>
</div>
</article>
`;
beforeEach(() => {
global.fetch = jest.fn();
});
afterEach(() => {
jest.resetAllMocks();
});
it('should successfully scrape blog content', async () => {
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(mockBlogHtml),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
expect(result.content).toContain('Title: The growing up train');
expect(result.content).toContain('Author: YU MURAI');
expect(result.content).toContain('Date: 2026-02-18');
expect(result.content).toContain('こんばんは');
expect(result.content).toContain('[Image: mobpqiCQR.jpg]');
expect(result.content).toContain('またね〜');
expect(result.content).toContain('村井優');
});
it('should extract correct metadata', async () => {
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(mockBlogHtml),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
expect(result.metadata).toEqual({
sourceName: 'Sakurazaka46 Blog',
url: 'https://sakurazaka46.com/s/s46/diary/detail/68008',
title: 'The growing up train',
author: 'YU MURAI',
publishedDate: '2026-02-18',
blogId: '68008',
});
});
it('should handle blog posts without title gracefully', async () => {
const htmlWithoutTitle = mockBlogHtml.replace(
/<h1 class="title">[^<]*<\/h1>/,
''
);
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(htmlWithoutTitle),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
expect(result.metadata.title).toBeFalsy();
});
it('should handle blog posts without author gracefully', async () => {
const htmlWithoutAuthor = mockBlogHtml.replace(
/<p class="eigo wf-a">[^<]*<\/p>/,
''
);
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(htmlWithoutAuthor),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
expect(result.metadata.author).toBeFalsy();
});
it('should handle HTTP errors', async () => {
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: false,
status: 404,
statusText: 'Not Found',
});
await expect(
scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/99999')
).rejects.toThrow('Failed to fetch page: 404 Not Found');
});
it('should handle network errors', async () => {
(global.fetch as jest.Mock).mockRejectedValueOnce(new Error('Network error'));
await expect(
scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008')
).rejects.toThrow('Failed to scrape Sakurazaka46 blog: Network error');
});
it('should handle missing article element', async () => {
const htmlWithoutArticle = '<div>No article here</div>';
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(htmlWithoutArticle),
});
await expect(
scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008')
).rejects.toThrow('Failed to scrape Sakurazaka46 blog: Could not find article content in the page');
});
it('should handle blog ID extraction from various URL formats', async () => {
(global.fetch as jest.Mock).mockResolvedValue({
ok: true,
text: () => Promise.resolve(mockBlogHtml),
});
const testCases = [
{ url: 'https://sakurazaka46.com/s/s46/diary/detail/68008', expectedId: '68008' },
{ url: 'https://sakurazaka46.com/s/s46/diary/detail/68008?ima=0000&cd=blog', expectedId: '68008' },
{ url: 'https://sakurazaka46.com/s/s46/diary/detail/12345#anchor', expectedId: '12345' },
];
for (const { url, expectedId } of testCases) {
const result = await scraper.scrape(url);
expect(result.metadata.blogId).toBe(expectedId);
}
});
it('should clean up HTML entities and tags properly', async () => {
const htmlWithEntities = `
<article>
<h1 class="title">Test &amp; Example</h1>
<div class="box-article">
<p>Hello&nbsp;world &lt;script&gt;alert(1)&lt;/script&gt;</p>
<p>Line 1<br/>Line 2</p>
</div>
</article>
`;
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(htmlWithEntities),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/1');
expect(result.content).toContain('Test & Example');
expect(result.content).toContain('Hello world');
expect(result.content).not.toContain('&nbsp;');
expect(result.content).not.toContain('&lt;script&gt;');
expect(result.content).not.toContain('<br/>');
});
it('should remove navigation and footer sections', async () => {
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(mockBlogHtml),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
// Navigation elements should not be in the content
expect(result.content).not.toContain('前へ');
expect(result.content).not.toContain('村井 優のブログ一覧');
expect(result.content).not.toContain('櫻坂46メッセージ');
expect(result.content).not.toContain('blog-foot-nav');
expect(result.content).not.toContain('app-button');
});
it('should handle multiple images in blog post', async () => {
const htmlWithMultipleImages = `
<article>
<h1 class="title">Multi Image Post</h1>
<div class="box-article">
<img src="/files/image1.jpg"/>
<p>Text between images</p>
<img src="/files/image2.png"/>
<img src="/files/image3.gif"/>
<p>After images text</p>
</div>
</article>
`;
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(htmlWithMultipleImages),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/1');
expect(result.content).toContain('[Image: image1.jpg]');
expect(result.content).toContain('[Image: image2.png]');
expect(result.content).toContain('[Image: image3.gif]');
expect(result.content).toContain('Text between images');
expect(result.content).toContain('After images text');
});
it('should extract all images from complex blog structure', async () => {
const htmlWithComplexImages = `
<article class="post">
<div class="col-r">
<div class="box-article">
<p><img src="/files/14/diary/blog1.jpg"/></p>
<p>First paragraph</p>
<p><img src="/files/14/diary/blog2.jpg"/><br/><img src="/files/14/diary/blog3.jpg"/></p>
<p>Second paragraph with <img src="/files/14/diary/blog4.jpg"/> inline image</p>
</div>
</div>
</article>
`;
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(htmlWithComplexImages),
});
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/1');
expect(result.content).toContain('[Image: blog1.jpg]');
expect(result.content).toContain('[Image: blog2.jpg]');
expect(result.content).toContain('[Image: blog3.jpg]');
expect(result.content).toContain('[Image: blog4.jpg]');
expect(result.content).toContain('First paragraph');
expect(result.content).toContain('Second paragraph');
});
it('should set correct User-Agent header', async () => {
(global.fetch as jest.Mock).mockResolvedValueOnce({
ok: true,
text: () => Promise.resolve(mockBlogHtml),
});
await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
expect(global.fetch).toHaveBeenCalledWith(
'https://sakurazaka46.com/s/s46/diary/detail/68008',
expect.objectContaining({
headers: expect.objectContaining({
'User-Agent': expect.stringContaining('Mozilla/5.0'),
}),
})
);
});
});
});

View File

@@ -0,0 +1,238 @@
import { Logger } from '@nestjs/common';
import { IWebScraper, WebScraperResult } from '../interfaces/web-scraper.interface';
/**
* Web scraper for Sakurazaka46 blog posts
* Handles URLs like: https://sakurazaka46.com/s/s46/diary/detail/{id}
*/
export class SakurazakaScraper implements IWebScraper {
private readonly logger = new Logger(SakurazakaScraper.name);
private readonly baseUrl = 'sakurazaka46.com';
canHandle(url: string): boolean {
try {
const urlObj = new URL(url);
return (
urlObj.hostname === this.baseUrl ||
urlObj.hostname === `www.${this.baseUrl}`
);
} catch {
return false;
}
}
async scrape(url: string): Promise<WebScraperResult> {
this.logger.log(`Starting to scrape URL: ${url}`);
try {
// Fetch the page content
this.logger.debug(`Fetching page content from: ${url}`);
const response = await fetch(url, {
headers: {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
},
});
if (!response.ok) {
this.logger.error(`Failed to fetch page: ${response.status} ${response.statusText}`);
throw new Error(`Failed to fetch page: ${response.status} ${response.statusText}`);
}
this.logger.debug(`Page fetched successfully, reading content...`);
const html = await response.text();
this.logger.debug(`HTML content received: ${html.length} bytes`);
// Extract content
this.logger.debug('Extracting content from HTML...');
const content = this.extractContent(html);
this.logger.log(`Content extracted: ${content.length} characters`);
const metadata = this.extractMetadata(html, url);
this.logger.log(`Metadata extracted:`, metadata);
return {
content,
metadata,
};
} catch (error) {
this.logger.error(`Failed to scrape Sakurazaka46 blog:`, error);
throw new Error(
`Failed to scrape Sakurazaka46 blog: ${error instanceof Error ? error.message : 'Unknown error'}`
);
}
}
private extractContent(html: string): string {
// Find the article element
const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
if (!articleMatch) {
throw new Error('Could not find article content in the page');
}
const article = articleMatch[0];
const parts: string[] = [];
// Extract title
const titleMatch = article.match(/<h1[^>]*class="[^"]*title[^"]*"[^>]*>([\s\S]*?)<\/h1>/i);
if (titleMatch) {
const title = this.stripHtml(titleMatch[1]).trim();
if (title) parts.push(`Title: ${title}`);
}
// Extract author
const authorMatch = article.match(/<p[^>]*class="[^"]*eigo[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
if (authorMatch) {
const author = this.stripHtml(authorMatch[1]).trim();
if (author) parts.push(`Author: ${author}`);
}
// Extract date from the calendar structure
const yearMatch = article.match(/<span[^>]*class="[^"]*ym-year[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
const monthMatch = article.match(/<span[^>]*class="[^"]*ym-month[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
const dayMatch = article.match(/<p[^>]*class="[^"]*date[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
if (yearMatch && monthMatch && dayMatch) {
const year = this.stripHtml(yearMatch[1]).trim();
const month = this.stripHtml(monthMatch[1]).trim();
const day = this.stripHtml(dayMatch[1]).trim();
parts.push(`Date: ${year}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`);
}
// Extract main content from box-article
// Find box-article and capture until we hit navigation sections
const boxMatch = article.match(/<div[^>]*class="[^"]*box-article[^"]*"[^>]*>([\s\S]*)$/i);
if (boxMatch) {
let content = boxMatch[1];
// Find where navigation starts and cut there
const navIndex = content.search(/<div[^>]*class="[^"]*col-l[^"]*"[^>]*>/i);
if (navIndex !== -1) {
content = content.substring(0, navIndex);
}
// Also cut at app-button section
const appIndex = content.search(/<div[^>]*class="[^"]*app-button[^"]*"/i);
if (appIndex !== -1) {
content = content.substring(0, appIndex);
}
const mainContent = this.cleanBlogContent(content);
if (mainContent) {
parts.push('');
parts.push(mainContent);
}
}
return parts.join('\n');
}
private extractMetadata(html: string, url: string): WebScraperResult['metadata'] {
const metadata: WebScraperResult['metadata'] = {
sourceName: 'Sakurazaka46 Blog',
url,
};
// Extract title
const titleMatch = html.match(/<h1[^>]*class="[^"]*title[^"]*"[^>]*>([\s\S]*?)<\/h1>/i);
if (titleMatch) {
metadata.title = this.stripHtml(titleMatch[1]).trim();
}
// Extract author
const authorMatch = html.match(/<p[^>]*class="[^"]*eigo[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
if (authorMatch) {
metadata.author = this.stripHtml(authorMatch[1]).trim();
}
// Extract date
const yearMatch = html.match(/<span[^>]*class="[^"]*ym-year[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
const monthMatch = html.match(/<span[^>]*class="[^"]*ym-month[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
const dayMatch = html.match(/<p[^>]*class="[^"]*date[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
if (yearMatch && monthMatch && dayMatch) {
const year = this.stripHtml(yearMatch[1]).trim();
const month = this.stripHtml(monthMatch[1]).trim().padStart(2, '0');
const day = this.stripHtml(dayMatch[1]).trim().padStart(2, '0');
metadata.publishedDate = `${year}-${month}-${day}`;
}
// Extract blog ID from URL
const idMatch = url.match(/detail\/(\d+)/);
if (idMatch) {
metadata.blogId = idMatch[1];
}
return metadata;
}
private stripHtml(html: string): string {
return html
.replace(/<br\s*\/?>/gi, '\n')
.replace(/<\/p>/gi, '\n')
.replace(/<[^>]+>/g, '')
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'")
.trim();
}
private cleanBlogContent(html: string): string {
// Remove HTML comments
html = html.replace(/<!--[\s\S]*?-->/g, '');
// Remove blog navigation/footer section
html = html.replace(/<div[^>]*class="[^"]*blog-foot-nav[^"]*"[\s\S]*$/i, '');
// Remove app button section
html = html.replace(/<div[^>]*class="[^"]*app-button[^"]*"[\s\S]*$/i, '');
// Remove script and style tags
let cleaned = html
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
// Convert <br> tags to newlines
cleaned = cleaned.replace(/<br\s*\/?>/gi, '\n');
// Handle image tags - convert to descriptive text
cleaned = cleaned.replace(/<img[^>]*src="([^"]*)"[^>]*>/gi, (match, src) => {
// Extract filename from src
const filename = src.split('/').pop();
return `\n[Image: ${filename}]\n`;
});
// Convert paragraph closings to double newlines
cleaned = cleaned.replace(/<\/p>/gi, '\n\n');
// Convert div closings to single newlines
cleaned = cleaned.replace(/<\/div>/gi, '\n');
// Remove remaining HTML tags
cleaned = cleaned.replace(/<[^>]+>/g, '');
// Decode HTML entities
cleaned = cleaned
.replace(/&nbsp;/g, ' ')
.replace(/&lt;/g, '<')
.replace(/&gt;/g, '>')
.replace(/&amp;/g, '&')
.replace(/&quot;/g, '"')
.replace(/&#39;/g, "'");
// Clean up excessive whitespace
cleaned = cleaned
.replace(/\n{4,}/g, '\n\n\n')
.trim();
// Remove trailing empty image references
cleaned = cleaned.replace(/\n*\[Image:[^\]]*\]\s*$/g, '');
// Remove trailing author/date section if present
cleaned = cleaned.replace(/\s+村井\s*優\s*\d{4}\/\d{2}\/\d{2}\s*\d{2}:\d{2}\s*$/g, '');
// Final trim
cleaned = cleaned.trim();
return cleaned;
}
}

View File

@@ -1,13 +1,15 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { IEmbeddingProvider } from './interfaces/embedding-provider.interface';
import { LocalEmbeddingProvider } from './providers/local-embedding.provider';
@Injectable()
export class EmbeddingService {
private readonly logger = new Logger(EmbeddingService.name);
private provider: IEmbeddingProvider;
constructor() {
const providerType = process.env.EMBEDDING_PROVIDER || 'local';
this.logger.log(`Initializing EmbeddingService with provider: ${providerType}`);
switch (providerType) {
case 'local':
@@ -19,7 +21,11 @@ export class EmbeddingService {
}
async embed(text: string): Promise<number[]> {
return this.provider.embed(text);
this.logger.debug(`Generating embedding for text (${text.length} chars)...`);
const startTime = Date.now();
const result = await this.provider.embed(text);
this.logger.debug(`Embedding generated in ${Date.now() - startTime}ms`);
return result;
}
async embedBatch(texts: string[]): Promise<number[][]> {

View File

@@ -1,4 +1,4 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { EmbeddingService } from './embedding.service';
import { VectorStoreService, SearchResult } from './vector-store.service';
import { MemoryType } from '@prisma/client';
@@ -11,6 +11,8 @@ export interface MemoryContext {
@Injectable()
export class MemoryService {
private readonly logger = new Logger(MemoryService.name);
constructor(
private embeddingService: EmbeddingService,
private vectorStore: VectorStoreService,
@@ -26,8 +28,15 @@ export class MemoryService {
metadata?: any;
},
): Promise<void> {
const startTime = Date.now();
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Generating embedding for content (${content.length} chars)...`);
const embedding = await this.embeddingService.embed(content);
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Embedding generated in ${Date.now() - startTime}ms, dimension: ${embedding.length}`);
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Storing in vector store...`);
await this.vectorStore.store(content, embedding, memoryType, options);
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Stored in vector store in ${Date.now() - startTime}ms`);
}
async retrieveRelevantMemories(
@@ -40,9 +49,20 @@ export class MemoryService {
memoryType?: MemoryType;
},
): Promise<MemoryContext[]> {
const { limit = 5, threshold = 0.6, conversationId, characterId, memoryType } = options;
this.logger.debug(
`[retrieveRelevantMemories] Query: "${query.substring(0, 100)}...", type: ${memoryType}, characterId: ${characterId}, conversationId: ${conversationId}, threshold: ${threshold}`,
);
const embedding = await this.embeddingService.embed(query);
const results = await this.vectorStore.searchSimilar(embedding, options);
this.logger.debug(`[retrieveRelevantMemories] Found ${results.length} results for type ${memoryType}:`);
results.forEach((r, i) => {
this.logger.debug(` [${i}] similarity: ${r.similarity.toFixed(4)}, content: "${r.content.substring(0, 80)}..."`);
});
return results.map((result) => ({
content: result.content,
metadata: result.metadata,
@@ -50,37 +70,44 @@ export class MemoryService {
}));
}
async buildContextForConversation(
conversationId: string,
currentMessage: string,
characterId: string,
): Promise<string> {
// Retrieve recent conversation memories
const conversationMemories = await this.retrieveRelevantMemories(
currentMessage,
{
limit: 3,
threshold: 0.6,
conversationId,
memoryType: 'conversation',
},
async buildContextForConversation(conversationId: string, currentMessage: string, characterId: string): Promise<string> {
this.logger.debug(
`[buildContextForConversation] Building context for conversation ${conversationId}, character ${characterId}, message: "${currentMessage.substring(0, 100)}"`,
);
// Retrieve character knowledge
const characterMemories = await this.retrieveRelevantMemories(
currentMessage,
{
limit: 3,
threshold: 0.7,
characterId,
memoryType: 'character',
},
);
// Retrieve recent conversation memories
const conversationMemories = await this.retrieveRelevantMemories(currentMessage, {
limit: 3,
threshold: 0.6,
conversationId,
memoryType: 'conversation',
});
// Retrieve character knowledge - using multilingual embedding model for cross-lingual support
// Lower threshold (0.3) for cross-lingual matching (English query -> Japanese content)
let characterMemories = await this.retrieveRelevantMemories(currentMessage, {
limit: 3,
threshold: 0.3,
characterId,
memoryType: 'character',
});
// Fallback: If vector search returns no results, retrieve the most recent knowledge
// This ensures the character always has some context for roleplaying
if (characterMemories.length === 0) {
this.logger.debug(`[buildContextForConversation] No similar memories found, falling back to most recent knowledge`);
const recentResults = await this.vectorStore.getRecentByCharacterId(characterId, 2);
characterMemories = recentResults.map((r) => ({
content: r.content,
metadata: r.metadata,
similarity: r.similarity,
}));
}
const contextParts: string[] = [];
if (characterMemories.length > 0) {
contextParts.push('Relevant character knowledge:');
contextParts.push('Here are some descriptions of yourself. These knowledge are your history, what you have done, talked, or imagined:');
characterMemories.forEach((memory) => {
contextParts.push(`- ${memory.content}`);
});
@@ -93,30 +120,26 @@ export class MemoryService {
});
}
return contextParts.join('\n');
const result = contextParts.join('\n');
this.logger.debug(`[buildContextForConversation] Final context (${result.length} chars):\n${result || '(empty)'}`);
return result;
}
async storeConversationMessage(
content: string,
conversationId: string,
metadata?: any,
): Promise<void> {
async storeConversationMessage(content: string, conversationId: string, metadata?: any): Promise<void> {
await this.addMemory(content, 'conversation', {
conversationId,
metadata,
});
}
async storeCharacterKnowledge(
content: string,
characterId: string,
knowledgeId: string,
metadata?: any,
): Promise<void> {
async storeCharacterKnowledge(content: string, characterId: string, knowledgeId: string, metadata?: any): Promise<void> {
this.logger.debug(`[${knowledgeId}] Storing character knowledge chunk for character: ${characterId}`);
await this.addMemory(content, 'character', {
characterId,
knowledgeId,
metadata,
});
this.logger.debug(`[${knowledgeId}] Character knowledge chunk stored successfully`);
}
}

View File

@@ -1,4 +1,4 @@
import { Injectable, OnModuleInit } from '@nestjs/common';
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
import { IEmbeddingProvider } from '../interfaces/embedding-provider.interface';
import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers';
@@ -7,6 +7,8 @@ export class LocalEmbeddingProvider implements IEmbeddingProvider, OnModuleInit
private extractor: FeatureExtractionPipeline | null = null;
private readonly modelName: string;
private readonly dimension: number;
private readonly logger = new Logger(LocalEmbeddingProvider.name);
private isLoading = false;
constructor() {
this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
@@ -15,31 +17,72 @@ export class LocalEmbeddingProvider implements IEmbeddingProvider, OnModuleInit
async onModuleInit() {
// Lazy initialization - model will be loaded on first use
this.logger.log(`LocalEmbeddingProvider initialized with model: ${this.modelName}`);
}
private async getExtractor(): Promise<FeatureExtractionPipeline> {
if (!this.extractor) {
this.extractor = await pipeline('feature-extraction', this.modelName, {
quantized: false, // Use full precision for better quality
});
if (this.isLoading) {
// Wait for existing load to complete
while (this.isLoading) {
await new Promise((resolve) => setTimeout(resolve, 100));
}
if (this.extractor) {
return this.extractor;
}
}
this.isLoading = true;
this.logger.log(`Loading embedding model: ${this.modelName}...`);
try {
// Use quantized model to reduce memory usage
this.extractor = await pipeline('feature-extraction', this.modelName, {
quantized: true,
revision: 'main',
});
this.logger.log('Embedding model loaded successfully');
} catch (error) {
this.logger.error('Failed to load embedding model:', error);
throw error;
} finally {
this.isLoading = false;
}
}
return this.extractor;
}
async embed(text: string): Promise<number[]> {
// Truncate text to prevent excessive memory usage
const maxLength = 512; // Maximum tokens the model can handle efficiently
const truncatedText = text.length > maxLength * 4 ? text.substring(0, maxLength * 4) : text;
const extractor = await this.getExtractor();
const output = await extractor(text, { pooling: 'mean', normalize: true });
return Array.from(output.data as Float32Array);
const output = await extractor(truncatedText, { pooling: 'mean', normalize: true });
// Convert to array - this creates a copy, allowing original to be GC'd
const result = Array.from(output.data as Float32Array);
// Add delay to allow GC between embeddings
await new Promise((resolve) => setTimeout(resolve, 10));
return result;
}
async embedBatch(texts: string[]): Promise<number[][]> {
const extractor = await this.getExtractor();
const outputs = await Promise.all(
texts.map((text) =>
extractor(text, { pooling: 'mean', normalize: true }),
),
);
return outputs.map((output) => Array.from(output.data as Float32Array));
const results: number[][] = [];
// Process one at a time to minimize memory spikes
for (let i = 0; i < texts.length; i++) {
results.push(await this.embed(texts[i]));
// Add delay every few items to allow GC
if (i > 0 && i % 2 === 0) {
await new Promise((resolve) => setTimeout(resolve, 50));
}
}
return results;
}
getDimension(): number {

View File

@@ -1,4 +1,4 @@
import { Injectable } from '@nestjs/common';
import { Injectable, Logger } from '@nestjs/common';
import { PrismaService } from '../prisma/prisma.service';
import { MemoryType, VectorMemory } from '@prisma/client';
@@ -12,6 +12,8 @@ export interface SearchResult {
@Injectable()
export class VectorStoreService {
private readonly logger = new Logger(VectorStoreService.name);
constructor(private prisma: PrismaService) {}
async store(
@@ -25,23 +27,35 @@ export class VectorStoreService {
metadata?: any;
},
): Promise<VectorMemory> {
const startTime = Date.now();
const vectorString = `[${embedding.join(',')}]`;
const metadataJson = options.metadata ? JSON.stringify(options.metadata) : null;
this.logger.debug(`[${options.knowledgeId}] Storing vector memory, type: ${memoryType}, vector dim: ${embedding.length}`);
return this.prisma.$queryRaw<VectorMemory[]>`
INSERT INTO "VectorMemory" (id, content, embedding, "memoryType", metadata, "conversationId", "characterId", "knowledgeId", "createdAt")
VALUES (
gen_random_uuid(),
${content},
${vectorString}::vector,
${memoryType},
${options.metadata ? JSON.stringify(options.metadata) : null}::jsonb,
${options.conversationId || null},
${options.characterId || null},
${options.knowledgeId || null},
NOW()
)
RETURNING *
`.then((results) => results[0]);
try {
const result = await this.prisma.$queryRaw<VectorMemory[]>`
INSERT INTO "VectorMemory" (id, content, embedding, "memoryType", metadata, "conversationId", "characterId", "knowledgeId", "createdAt")
VALUES (
gen_random_uuid(),
${content},
${vectorString}::vector,
${memoryType},
${metadataJson}::jsonb,
${options.conversationId || null},
${options.characterId || null},
${options.knowledgeId || null},
NOW()
)
RETURNING *
`.then((results) => results[0]);
this.logger.debug(`[${options.knowledgeId}] Vector memory stored in ${Date.now() - startTime}ms, id: ${result.id}`);
return result;
} catch (error) {
this.logger.error(`[${options.knowledgeId}] Failed to store vector memory:`, error);
throw error;
}
}
async searchSimilar(
@@ -93,7 +107,34 @@ export class VectorStoreService {
LIMIT $3
`;
return this.prisma.$queryRawUnsafe<SearchResult[]>(query, ...params);
this.logger.debug(`[searchSimilar] Query params: threshold=${threshold}, limit=${limit}, characterId=${options.characterId}, memoryType=${options.memoryType}`);
const results = await this.prisma.$queryRawUnsafe<SearchResult[]>(query, ...params);
this.logger.debug(`[searchSimilar] Found ${results.length} results matching criteria`);
// Debug: Show all similarities for character knowledge
if (options.characterId && options.memoryType === 'character') {
const allQuery = `
SELECT
id,
content,
"memoryType",
metadata,
1 - (embedding <=> $1::vector) as similarity
FROM "VectorMemory"
WHERE "characterId" = $2 AND "memoryType" = $3
ORDER BY embedding <=> $1::vector
LIMIT 10
`;
const allResults = await this.prisma.$queryRawUnsafe<SearchResult[]>(allQuery, vectorString, options.characterId, options.memoryType);
this.logger.debug(`[searchSimilar] All ${allResults.length} similarities for character ${options.characterId}:`);
allResults.forEach((r, i) => {
this.logger.debug(` [${i}] similarity=${r.similarity.toFixed(4)}, content="${r.content.substring(0, 50)}..."`);
});
}
return results;
}
async deleteByConversation(conversationId: string): Promise<void> {
@@ -113,4 +154,28 @@ export class VectorStoreService {
where: { knowledgeId },
});
}
/**
* Retrieve most recent character knowledge (fallback when similarity search returns nothing)
*/
async getRecentByCharacterId(characterId: string, limit: number = 2): Promise<SearchResult[]> {
this.logger.debug(`[getRecentByCharacterId] Retrieving recent knowledge for character ${characterId}`);
const results = await this.prisma.$queryRaw<SearchResult[]>`
SELECT
id,
content,
"memoryType",
metadata,
1.0 as similarity
FROM "VectorMemory"
WHERE "characterId" = ${characterId} AND "memoryType" = 'character'
ORDER BY "createdAt" DESC
LIMIT ${limit}
`;
this.logger.debug(`[getRecentByCharacterId] Found ${results.length} recent memories`);
return results;
}
}

View File

@@ -6,6 +6,7 @@ import { CharacterList } from './pages/CharacterList';
import { CharacterForm } from './pages/CharacterForm';
import { ConversationList } from './pages/ConversationList';
import { Chat } from './pages/Chat';
import { KnowledgeImport } from './pages/KnowledgeImport';
// OAuth Callback Handler - processes tokens from URL before routing
function OAuthCallbackHandler({ children }: { children: React.ReactNode }) {
@@ -124,6 +125,15 @@ function App() {
</PrivateRoute>
}
/>
<Route
path="/characters/:characterId/knowledge"
element={
<PrivateRoute>
<KnowledgeImport />
</PrivateRoute>
}
/>
</Routes>
</OAuthCallbackHandler>
</BrowserRouter>

View File

@@ -6,7 +6,9 @@
* OpenAPI spec version: 1.0.0
*/
import type {
ImportControllerUploadFileBody
ImportControllerUploadFileBody,
ImportUrlDto,
UploadResponseDto,
} from '.././model';
import { customFetch } from '../../mutator/custom-fetch';
@@ -109,3 +111,29 @@ export const importControllerDeleteKnowledge = async (knowledgeId: string, optio
);}
/**
* @summary Import content from URL for character knowledge
*/
export const getImportControllerImportFromUrlUrl = (characterId: string,) => {
return `http://localhost:3000/api/import/characters/${characterId}/url`
}
export const importControllerImportFromUrl = async (characterId: string,
importUrlDto: ImportUrlDto, options?: RequestInit): Promise<UploadResponseDto> => {
return customFetch<UploadResponseDto>(getImportControllerImportFromUrlUrl(characterId),
{
...options,
method: 'POST',
headers: {
...options?.headers,
'Content-Type': 'application/json',
},
body: JSON.stringify(importUrlDto),
}
);}

View File

@@ -0,0 +1,12 @@
/**
* Generated by orval v8.4.2 🍺
* Do not edit manually.
* DreamChat API
* The DreamChat API documentation
* OpenAPI spec version: 1.0.0
*/
export interface ImportUrlDto {
/** URL to import */
url: string;
}

View File

@@ -20,7 +20,9 @@ export * from './createCharacterDtoAttributes';
export * from './createCharacterDtoConfig';
export * from './createConversationDto';
export * from './importControllerUploadFileBody';
export * from './importUrlDto';
export * from './keycloakConfigDto';
export * from './uploadResponseDto';
export * from './keycloakLoginUrlDto';
export * from './loginDto';
export * from './messageResponseDto';

View File

@@ -0,0 +1,14 @@
/**
* Generated by orval v8.4.2 🍺
* Do not edit manually.
* DreamChat API
* The DreamChat API documentation
* OpenAPI spec version: 1.0.0
*/
export interface UploadResponseDto {
/** Knowledge ID */
knowledgeId: string;
/** Status message */
message: string;
}

View File

@@ -1,6 +1,7 @@
import { useEffect, useState } from 'react';
import { useNavigate, useParams, Link } from 'react-router-dom';
import { useCharacterStore } from '../stores/characterStore';
import { importControllerGetCharacterKnowledge } from '../api/generated/import/import';
export function CharacterForm() {
const { id } = useParams<{ id: string }>();
@@ -23,10 +24,17 @@ export function CharacterForm() {
const [avatarUrl, setAvatarUrl] = useState('');
const [isPublic, setIsPublic] = useState(false);
const [attributes, setAttributes] = useState('{}');
const [knowledgeCount, setKnowledgeCount] = useState(0);
useEffect(() => {
if (isEditing && id) {
getCharacter(id);
// Fetch knowledge count
importControllerGetCharacterKnowledge(id).then((knowledge: any) => {
setKnowledgeCount(knowledge?.length || 0);
}).catch(() => {
setKnowledgeCount(0);
});
}
return () => {
setCurrentCharacter(null);
@@ -174,6 +182,28 @@ export function CharacterForm() {
</label>
</div>
{isEditing && id && (
<div className="bg-gray-50 rounded-lg p-4">
<div className="flex items-center justify-between">
<div>
<h3 className="text-sm font-medium text-gray-900">Knowledge Sources</h3>
<p className="text-sm text-gray-500">
{knowledgeCount === 0
? 'No knowledge imported yet'
: `${knowledgeCount} knowledge source${knowledgeCount === 1 ? '' : 's'} imported`
}
</p>
</div>
<Link
to={`/characters/${id}/knowledge`}
className="inline-flex items-center px-3 py-1.5 border border-gray-300 rounded text-sm font-medium text-gray-700 bg-white hover:bg-gray-50"
>
{knowledgeCount === 0 ? 'Import Knowledge' : 'Manage Knowledge'}
</Link>
</div>
</div>
)}
<div className="flex space-x-4">
<button
type="submit"

View File

@@ -5,6 +5,7 @@ import { useCharacterStore } from '../stores/characterStore';
import { useAuthStore } from '../stores/authStore';
import type { Message } from '../types';
import { io, Socket } from 'socket.io-client';
import { chatControllerSendMessage } from '../api/generated/conversations/conversations';
const WS_URL = (import.meta.env as unknown as ImportMetaEnv).VITE_WS_URL || 'http://localhost:3000';
@@ -26,6 +27,7 @@ export function Chat() {
const [message, setMessage] = useState('');
const [streamingContent, setStreamingContent] = useState('');
const [socketConnected, setSocketConnected] = useState(false);
const messagesEndRef = useRef<HTMLDivElement>(null);
const socketRef = useRef<Socket | null>(null);
@@ -42,6 +44,17 @@ export function Chat() {
socket.on('connect', () => {
console.log('Connected to chat server');
setSocketConnected(true);
});
socket.on('disconnect', () => {
console.log('Disconnected from chat server');
setSocketConnected(false);
});
socket.on('connect_error', (err) => {
console.error('Socket connection error:', err.message);
setSocketConnected(false);
});
socket.on('message_chunk', (data: { conversationId: string; chunk: { content: string } }) => {
@@ -111,11 +124,30 @@ export function Chat() {
setStreamingContent('');
setStreaming(true);
// Send via socket for streaming
socketRef.current?.emit('send_message', {
conversationId,
content,
});
// Check if socket is connected, otherwise fallback to HTTP
if (socketRef.current?.connected) {
socketRef.current.emit('send_message', {
conversationId,
content,
});
} else {
// Fallback to HTTP API when socket is not connected
try {
const result = await chatControllerSendMessage(conversationId, { content });
// Add messages to the conversation
if (result.userMessage) {
addMessage(result.userMessage as Message);
}
if (result.assistantMessage) {
addMessage(result.assistantMessage as Message);
}
setStreaming(false);
} catch (error) {
console.error('Failed to send message:', error);
setStreaming(false);
}
}
};
const handleLogout = () => {

View File

@@ -0,0 +1,383 @@
import { useEffect, useState, useCallback } from 'react';
import { useParams, Link } from 'react-router-dom';
import { useCharacterStore } from '../stores/characterStore';
import { importControllerImportFromUrl, importControllerDeleteKnowledge } from '../api/generated/import/import';
interface KnowledgeItem {
id: string;
name: string;
sourceType: 'file' | 'url' | 'manual';
sourceName: string;
status: 'pending' | 'processing' | 'completed' | 'failed';
createdAt: string;
processingInfo?: {
error?: string;
chunksProcessed?: number;
[key: string]: any;
};
}
export function KnowledgeImport() {
const { characterId } = useParams<{ characterId: string }>();
const { currentCharacter, getCharacter } = useCharacterStore();
const [url, setUrl] = useState('');
const [knowledgeList, setKnowledgeList] = useState<KnowledgeItem[]>([]);
const [isLoading, setIsLoading] = useState(false);
const [isImporting, setIsImporting] = useState(false);
const [error, setError] = useState<string | null>(null);
const [successMessage, setSuccessMessage] = useState<string | null>(null);
// Supported URL patterns for user reference
const supportedPatterns = [
{ label: 'Sakurazaka46 Blog', pattern: 'sakurazaka46.com/s/s46/diary/detail/' },
];
const fetchKnowledge = useCallback(async () => {
if (!characterId) return;
setIsLoading(true);
try {
const response = await fetch(`http://localhost:3000/api/import/characters/${characterId}/knowledge`, {
headers: {
'Authorization': `Bearer ${localStorage.getItem('accessToken')}`,
},
});
if (response.ok) {
const data = await response.json();
setKnowledgeList(data);
}
} catch {
// Silent fail - we'll show empty state
} finally {
setIsLoading(false);
}
}, [characterId]);
useEffect(() => {
if (characterId) {
getCharacter(characterId);
fetchKnowledge();
}
}, [characterId, getCharacter, fetchKnowledge]);
const validateUrl = (inputUrl: string): { valid: boolean; message?: string } => {
try {
const urlObj = new URL(inputUrl);
// Check if it's a supported domain
const supportedDomains = ['sakurazaka46.com', 'www.sakurazaka46.com'];
if (!supportedDomains.includes(urlObj.hostname)) {
return {
valid: false,
message: `Unsupported website: ${urlObj.hostname}. Currently only Sakurazaka46 blogs are supported.`
};
}
// Check if it's a blog detail page
if (!urlObj.pathname.includes('/diary/detail/')) {
return {
valid: false,
message: 'URL does not appear to be a valid blog post. Please use a blog detail URL.'
};
}
return { valid: true };
} catch {
return { valid: false, message: 'Invalid URL format' };
}
};
const handleSubmit = async (e: React.FormEvent) => {
e.preventDefault();
setError(null);
setSuccessMessage(null);
if (!characterId) {
setError('Character ID is missing');
return;
}
if (!url.trim()) {
setError('Please enter a URL');
return;
}
// Validate URL before attempting import
const validation = validateUrl(url.trim());
if (!validation.valid) {
setError(validation.message || 'Invalid URL');
return;
}
setIsImporting(true);
try {
const result = await importControllerImportFromUrl(characterId, { url: url.trim() });
setSuccessMessage(`Import started! Knowledge ID: ${result.knowledgeId}`);
setUrl('');
// Refresh the knowledge list
await fetchKnowledge();
// Clear success message after 5 seconds
setTimeout(() => setSuccessMessage(null), 5000);
} catch (err: any) {
// Handle specific error messages from the backend
const errorMessage = err.message || 'Failed to import URL';
if (errorMessage.includes('Unsupported URL')) {
setError('This website is not supported for import. Currently only Sakurazaka46 blogs are supported.');
} else if (errorMessage.includes('Failed to fetch') || errorMessage.includes('Failed to scrape')) {
setError('Could not fetch content from the URL. Please check the URL is correct and accessible.');
} else if (errorMessage.includes('Could not find article')) {
setError('Could not find blog content on the page. Please check this is a valid blog post URL.');
} else {
setError(errorMessage);
}
} finally {
setIsImporting(false);
}
};
const handleDelete = async (knowledgeId: string) => {
if (!confirm('Are you sure you want to delete this knowledge?')) return;
try {
await importControllerDeleteKnowledge(knowledgeId);
await fetchKnowledge();
} catch (err: any) {
setError(err.message || 'Failed to delete knowledge');
}
};
const getStatusColor = (status: string) => {
switch (status) {
case 'completed': return 'bg-green-100 text-green-800';
case 'processing': return 'bg-yellow-100 text-yellow-800';
case 'failed': return 'bg-red-100 text-red-800';
default: return 'bg-gray-100 text-gray-800';
}
};
const getStatusIcon = (status: string) => {
switch (status) {
case 'completed': return '✓';
case 'processing': return '⟳';
case 'failed': return '✗';
default: return '○';
}
};
const formatDate = (dateString: string) => {
return new Date(dateString).toLocaleDateString('en-US', {
year: 'numeric',
month: 'short',
day: 'numeric',
hour: '2-digit',
minute: '2-digit',
});
};
const truncateUrl = (url: string, maxLength: number = 50) => {
if (url.length <= maxLength) return url;
return url.substring(0, maxLength) + '...';
};
return (
<div className="min-h-screen bg-gray-50">
<header className="bg-white shadow">
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-4">
<div className="flex items-center justify-between">
<div className="flex items-center space-x-4">
<Link to={`/characters/${characterId}`} className="text-gray-600 hover:text-gray-900">
Back to Character
</Link>
<h1 className="text-xl font-bold text-gray-900">
Knowledge Import
{currentCharacter && (
<span className="text-gray-500 font-normal"> - {currentCharacter.name}</span>
)}
</h1>
</div>
</div>
</div>
</header>
<main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
{/* Left column - Import form */}
<div className="space-y-6">
{/* URL Import Card */}
<div className="bg-white rounded-lg shadow p-6">
<h2 className="text-lg font-semibold text-gray-900 mb-4">
Import from URL
</h2>
{error && (
<div className="mb-4 bg-red-50 border border-red-200 text-red-700 p-3 rounded">
<div className="flex items-start">
<span className="mr-2"></span>
<span>{error}</span>
</div>
</div>
)}
{successMessage && (
<div className="mb-4 bg-green-50 border border-green-200 text-green-700 p-3 rounded">
<div className="flex items-start">
<span className="mr-2"></span>
<span>{successMessage}</span>
</div>
</div>
)}
<form onSubmit={handleSubmit} className="space-y-4">
<div>
<label htmlFor="url" className="block text-sm font-medium text-gray-700">
Blog URL
</label>
<input
type="url"
id="url"
value={url}
onChange={(e) => setUrl(e.target.value)}
placeholder="https://sakurazaka46.com/s/s46/diary/detail/68008"
className="mt-1 block w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500"
/>
<p className="mt-1 text-sm text-gray-500">
Enter a Sakurazaka46 blog URL to import as character knowledge.
</p>
</div>
<button
type="submit"
disabled={isImporting || !url.trim()}
className="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50 disabled:cursor-not-allowed"
>
{isImporting ? (
<span className="flex items-center">
<svg className="animate-spin -ml-1 mr-3 h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
</svg>
Importing...
</span>
) : (
'Import from URL'
)}
</button>
</form>
</div>
{/* Supported Sites Card */}
<div className="bg-white rounded-lg shadow p-6">
<h3 className="text-md font-medium text-gray-900 mb-3">
Supported Websites
</h3>
<ul className="space-y-2">
{supportedPatterns.map((pattern, index) => (
<li key={index} className="flex items-center text-sm text-gray-600">
<span className="w-2 h-2 bg-green-400 rounded-full mr-2"></span>
<span className="font-medium">{pattern.label}</span>
<code className="ml-2 px-2 py-0.5 bg-gray-100 rounded text-xs">
{pattern.pattern}
</code>
</li>
))}
</ul>
<p className="mt-3 text-xs text-gray-500">
More websites will be supported in future updates.
</p>
</div>
</div>
{/* Right column - Knowledge list */}
<div className="bg-white rounded-lg shadow p-6">
<h2 className="text-lg font-semibold text-gray-900 mb-4">
Knowledge Sources
</h2>
{isLoading ? (
<div className="text-center py-8">
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-indigo-600 mx-auto"></div>
<p className="mt-2 text-sm text-gray-500">Loading...</p>
</div>
) : knowledgeList.length === 0 ? (
<div className="text-center py-12 bg-gray-50 rounded-lg">
<p className="text-gray-500">No knowledge sources yet.</p>
<p className="text-sm text-gray-400 mt-1">
Import a blog URL to add knowledge to this character.
</p>
</div>
) : (
<div className="space-y-4 max-h-[600px] overflow-y-auto">
{knowledgeList.map((knowledge) => (
<div
key={knowledge.id}
className="border border-gray-200 rounded-lg p-4 hover:shadow-md transition-shadow"
>
<div className="flex items-start justify-between">
<div className="flex-1 min-w-0">
<div className="flex items-center space-x-2">
<span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${getStatusColor(knowledge.status)}`}>
<span className="mr-1">{getStatusIcon(knowledge.status)}</span>
{knowledge.status.charAt(0).toUpperCase() + knowledge.status.slice(1)}
</span>
<span className="text-xs text-gray-500">
{knowledge.sourceType === 'url' ? '🔗' : '📄'} {knowledge.sourceType}
</span>
</div>
<h3 className="mt-2 text-sm font-medium text-gray-900 truncate">
{knowledge.name}
</h3>
{knowledge.sourceType === 'url' && (
<p className="mt-1 text-xs text-gray-500 truncate" title={knowledge.sourceName}>
{truncateUrl(knowledge.sourceName)}
</p>
)}
<p className="mt-1 text-xs text-gray-400">
{formatDate(knowledge.createdAt)}
</p>
{knowledge.status === 'processing' && (
<p className="mt-2 text-xs text-yellow-600">
Processing content... This may take a moment.
</p>
)}
{knowledge.status === 'failed' && knowledge.processingInfo?.error && (
<p className="mt-2 text-xs text-red-600">
Error: {knowledge.processingInfo.error}
</p>
)}
{knowledge.status === 'completed' && knowledge.processingInfo?.chunksProcessed && (
<p className="mt-2 text-xs text-green-600">
Processed {knowledge.processingInfo.chunksProcessed} chunks
</p>
)}
</div>
<button
onClick={() => handleDelete(knowledge.id)}
className="ml-4 text-red-600 hover:text-red-800 text-sm"
title="Delete knowledge"
>
🗑
</button>
</div>
</div>
))}
</div>
)}
</div>
</div>
</main>
</div>
);
}