feat: Implement knowledge import feature for characters
- Added KnowledgeImport page for importing character knowledge from URLs. - Integrated URL validation and error handling for unsupported websites. - Created API endpoints for importing content from URLs and retrieving character knowledge. - Enhanced VectorStoreService with logging and error handling for vector memory storage. - Updated frontend to display knowledge sources and manage them effectively. - Added support for fetching recent character knowledge as a fallback in similarity searches. - Updated OpenAPI documentation to reflect new import functionality.
This commit is contained in:
18
.env.example
18
.env.example
@@ -20,6 +20,24 @@ EMBEDDING_MODEL=Xenova/all-MiniLM-L6-v2
|
|||||||
EMBEDDING_DIMENSION=384
|
EMBEDDING_DIMENSION=384
|
||||||
EMBEDDING_DEVICE=cpu
|
EMBEDDING_DEVICE=cpu
|
||||||
|
|
||||||
|
# Use quantized model for lower memory usage (~4x smaller, slightly less accurate)
|
||||||
|
# Set to 'true' for low-memory systems (512MB-1GB RAM)
|
||||||
|
EMBEDDING_QUANTIZED=false
|
||||||
|
|
||||||
|
# Node.js Memory Configuration (increase if embedding causes OOM)
|
||||||
|
# For 512MB RAM VPS: NODE_OPTIONS=--max-old-space-size=384
|
||||||
|
# For 1GB RAM VPS: NODE_OPTIONS=--max-old-space-size=768
|
||||||
|
# For 2GB RAM VPS: NODE_OPTIONS=--max-old-space-size=1536
|
||||||
|
# Default (no env var): Node uses ~4GB or system limit
|
||||||
|
#NODE_OPTIONS=--max-old-space-size=768
|
||||||
|
|
||||||
|
# Request Logging Configuration
|
||||||
|
# Enable/disable request logging (default: true)
|
||||||
|
#REQUEST_LOGGER=true
|
||||||
|
|
||||||
|
# Log level: verbose (detailed), standard (default), minimal (status only)
|
||||||
|
#REQUEST_LOGGER_LEVEL=standard
|
||||||
|
|
||||||
# HuggingFace API (optional - if not using local embeddings)
|
# HuggingFace API (optional - if not using local embeddings)
|
||||||
# HUGGINGFACE_API_KEY=hf_...
|
# HUGGINGFACE_API_KEY=hf_...
|
||||||
|
|
||||||
|
|||||||
39
README.md
39
README.md
@@ -186,6 +186,45 @@ docker-compose exec backend pnpm db:migrate
|
|||||||
|
|
||||||
**Note:** An external reverse proxy (nginx, Traefik, etc.) is expected for SSL termination and routing. See [deployment.md](doc/deployment.md) for configuration examples.
|
**Note:** An external reverse proxy (nginx, Traefik, etc.) is expected for SSL termination and routing. See [deployment.md](doc/deployment.md) for configuration examples.
|
||||||
|
|
||||||
|
## Troubleshooting
|
||||||
|
|
||||||
|
### Memory Issues (JavaScript heap out of memory)
|
||||||
|
|
||||||
|
The local embedding model uses TensorFlow.js which can consume significant memory. If you encounter OOM errors:
|
||||||
|
|
||||||
|
1. **Increase Node.js memory limit:**
|
||||||
|
```bash
|
||||||
|
# In apps/backend/.env
|
||||||
|
NODE_OPTIONS=--max-old-space-size=768
|
||||||
|
|
||||||
|
# Or when starting manually
|
||||||
|
node --max-old-space-size=768 dist/main
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Use low-memory mode (for 512MB RAM VPS):**
|
||||||
|
```bash
|
||||||
|
cd apps/backend
|
||||||
|
pnpm run start:low-memory
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Use quantized model (4x smaller memory):**
|
||||||
|
```bash
|
||||||
|
# In apps/backend/.env
|
||||||
|
EMBEDDING_QUANTIZED=true
|
||||||
|
```
|
||||||
|
|
||||||
|
4. **Limit import size:** The system automatically:
|
||||||
|
- Limits content to ~30KB per import
|
||||||
|
- Limits to 30 chunks maximum
|
||||||
|
- Processes chunks sequentially with delays
|
||||||
|
- Adds GC pauses between chunks
|
||||||
|
|
||||||
|
### System Requirements
|
||||||
|
|
||||||
|
- **Minimum:** 512MB RAM (with low-memory configuration)
|
||||||
|
- **Recommended:** 1GB+ RAM for comfortable operation
|
||||||
|
- **Import processing:** Large imports may take longer on low-memory systems
|
||||||
|
|
||||||
## License
|
## License
|
||||||
|
|
||||||
MIT
|
MIT
|
||||||
|
|||||||
14
apps/backend/jest.config.js
Normal file
14
apps/backend/jest.config.js
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
module.exports = {
|
||||||
|
moduleFileExtensions: ['js', 'json', 'ts'],
|
||||||
|
rootDir: 'src',
|
||||||
|
testRegex: '.*\\.spec\\.ts$',
|
||||||
|
transform: {
|
||||||
|
'^.+\\.(t|j)s$': 'ts-jest',
|
||||||
|
},
|
||||||
|
collectCoverageFrom: ['**/*.(t|j)s'],
|
||||||
|
coverageDirectory: '../coverage',
|
||||||
|
testEnvironment: 'node',
|
||||||
|
moduleNameMapper: {
|
||||||
|
'^@dreamchat/shared$': '<rootDir>/../../packages/shared/dist',
|
||||||
|
},
|
||||||
|
};
|
||||||
@@ -4,7 +4,9 @@
|
|||||||
"scripts": {
|
"scripts": {
|
||||||
"build": "nest build",
|
"build": "nest build",
|
||||||
"dev": "nest start --watch",
|
"dev": "nest start --watch",
|
||||||
"start": "node dist/main",
|
"start": "node --max-old-space-size=768 dist/main",
|
||||||
|
"start:low-memory": "node --max-old-space-size=384 dist/main",
|
||||||
|
"start:high-memory": "node --max-old-space-size=1536 dist/main",
|
||||||
"test": "jest",
|
"test": "jest",
|
||||||
"test:watch": "jest --watch",
|
"test:watch": "jest --watch",
|
||||||
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\"",
|
"lint": "eslint \"{src,apps,libs,test}/**/*.ts\"",
|
||||||
@@ -46,6 +48,7 @@
|
|||||||
"@nestjs/cli": "^11.0.16",
|
"@nestjs/cli": "^11.0.16",
|
||||||
"@nestjs/testing": "^11.1.14",
|
"@nestjs/testing": "^11.1.14",
|
||||||
"@types/bcrypt": "^6.0.0",
|
"@types/bcrypt": "^6.0.0",
|
||||||
|
"@types/jest": "^30.0.0",
|
||||||
"@types/jsonwebtoken": "^9.0.0",
|
"@types/jsonwebtoken": "^9.0.0",
|
||||||
"@types/multer": "^1.4.12",
|
"@types/multer": "^1.4.12",
|
||||||
"@types/node": "^24.10.13",
|
"@types/node": "^24.10.13",
|
||||||
@@ -53,6 +56,7 @@
|
|||||||
"@types/passport-local": "^1.0.0",
|
"@types/passport-local": "^1.0.0",
|
||||||
"jest": "^30.2.0",
|
"jest": "^30.2.0",
|
||||||
"prisma": "^7.4.1",
|
"prisma": "^7.4.1",
|
||||||
|
"ts-jest": "^29.4.6",
|
||||||
"typescript": "^5.3.0"
|
"typescript": "^5.3.0"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Module } from '@nestjs/common';
|
import { Module, NestModule, MiddlewareConsumer } from '@nestjs/common';
|
||||||
import { APP_GUARD } from '@nestjs/core';
|
import { APP_GUARD } from '@nestjs/core';
|
||||||
import { PrismaModule } from './prisma/prisma.module';
|
import { PrismaModule } from './prisma/prisma.module';
|
||||||
import { AuthModule } from './auth/auth.module';
|
import { AuthModule } from './auth/auth.module';
|
||||||
@@ -9,6 +9,7 @@ import { VectorModule } from './vector/vector.module';
|
|||||||
import { ChatModule } from './chat/chat.module';
|
import { ChatModule } from './chat/chat.module';
|
||||||
import { ImportModule } from './import/import.module';
|
import { ImportModule } from './import/import.module';
|
||||||
import { JwtAuthGuard } from './auth/guards/jwt-auth.guard';
|
import { JwtAuthGuard } from './auth/guards/jwt-auth.guard';
|
||||||
|
import { RequestLoggerMiddleware } from './common/middleware';
|
||||||
|
|
||||||
@Module({
|
@Module({
|
||||||
imports: [
|
imports: [
|
||||||
@@ -29,4 +30,10 @@ import { JwtAuthGuard } from './auth/guards/jwt-auth.guard';
|
|||||||
},
|
},
|
||||||
],
|
],
|
||||||
})
|
})
|
||||||
export class AppModule {}
|
export class AppModule implements NestModule {
|
||||||
|
configure(consumer: MiddlewareConsumer) {
|
||||||
|
consumer
|
||||||
|
.apply(RequestLoggerMiddleware)
|
||||||
|
.forRoutes('*');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import {
|
|||||||
OnGatewayDisconnect,
|
OnGatewayDisconnect,
|
||||||
} from '@nestjs/websockets';
|
} from '@nestjs/websockets';
|
||||||
import { Server, Socket } from 'socket.io';
|
import { Server, Socket } from 'socket.io';
|
||||||
import { UseGuards } from '@nestjs/common';
|
import { UseGuards, Logger } from '@nestjs/common';
|
||||||
import { ChatService } from './chat.service';
|
import { ChatService } from './chat.service';
|
||||||
import { JwtService } from '@nestjs/jwt';
|
import { JwtService } from '@nestjs/jwt';
|
||||||
|
|
||||||
@@ -27,6 +27,8 @@ export class ChatGateway implements OnGatewayConnection, OnGatewayDisconnect {
|
|||||||
@WebSocketServer()
|
@WebSocketServer()
|
||||||
server: Server;
|
server: Server;
|
||||||
|
|
||||||
|
private readonly logger = new Logger(ChatGateway.name);
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private chatService: ChatService,
|
private chatService: ChatService,
|
||||||
private jwtService: JwtService,
|
private jwtService: JwtService,
|
||||||
@@ -83,6 +85,8 @@ export class ChatGateway implements OnGatewayConnection, OnGatewayDisconnect {
|
|||||||
) {
|
) {
|
||||||
if (!client.userId) return;
|
if (!client.userId) return;
|
||||||
|
|
||||||
|
this.logger.debug(`[handleSendMessage] Received message from user ${client.userId} for conversation ${data.conversationId}: "${data.content}"`);
|
||||||
|
|
||||||
const room = `conversation:${data.conversationId}`;
|
const room = `conversation:${data.conversationId}`;
|
||||||
|
|
||||||
try {
|
try {
|
||||||
@@ -119,6 +123,7 @@ export class ChatGateway implements OnGatewayConnection, OnGatewayDisconnect {
|
|||||||
conversationId: data.conversationId,
|
conversationId: data.conversationId,
|
||||||
assistantMessage,
|
assistantMessage,
|
||||||
});
|
});
|
||||||
|
this.logger.debug(`[handleSendMessage] Message streaming completed for conversation ${data.conversationId}`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
client.emit('error', {
|
client.emit('error', {
|
||||||
conversationId: data.conversationId,
|
conversationId: data.conversationId,
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Injectable, NotFoundException, ForbiddenException } from '@nestjs/common';
|
import { Injectable, NotFoundException, ForbiddenException, Logger } from '@nestjs/common';
|
||||||
import { PrismaService } from '../prisma/prisma.service';
|
import { PrismaService } from '../prisma/prisma.service';
|
||||||
import { LLMService } from '../llm/llm.service';
|
import { LLMService } from '../llm/llm.service';
|
||||||
import { MemoryService } from '../vector/memory.service';
|
import { MemoryService } from '../vector/memory.service';
|
||||||
@@ -8,6 +8,8 @@ import { Conversation, Message, MessageRole } from '@prisma/client';
|
|||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ChatService {
|
export class ChatService {
|
||||||
|
private readonly logger = new Logger(ChatService.name);
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private prisma: PrismaService,
|
private prisma: PrismaService,
|
||||||
private llmService: LLMService,
|
private llmService: LLMService,
|
||||||
@@ -15,15 +17,9 @@ export class ChatService {
|
|||||||
private characterService: CharacterService,
|
private characterService: CharacterService,
|
||||||
) {}
|
) {}
|
||||||
|
|
||||||
async createConversation(
|
async createConversation(userId: string, createConversationDto: CreateConversationDto): Promise<Conversation> {
|
||||||
userId: string,
|
|
||||||
createConversationDto: CreateConversationDto,
|
|
||||||
): Promise<Conversation> {
|
|
||||||
// Verify character exists and user has access
|
// Verify character exists and user has access
|
||||||
const character = await this.characterService.findById(
|
const character = await this.characterService.findById(createConversationDto.characterId, userId);
|
||||||
createConversationDto.characterId,
|
|
||||||
userId,
|
|
||||||
);
|
|
||||||
|
|
||||||
return this.prisma.conversation.create({
|
return this.prisma.conversation.create({
|
||||||
data: {
|
data: {
|
||||||
@@ -96,11 +92,7 @@ export class ChatService {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async sendMessage(
|
async sendMessage(conversationId: string, userId: string, sendMessageDto: SendMessageDto): Promise<{ userMessage: Message; assistantMessage: Message }> {
|
||||||
conversationId: string,
|
|
||||||
userId: string,
|
|
||||||
sendMessageDto: SendMessageDto,
|
|
||||||
): Promise<{ userMessage: Message; assistantMessage: Message }> {
|
|
||||||
const conversation = await this.findConversationById(conversationId, userId);
|
const conversation = await this.findConversationById(conversationId, userId);
|
||||||
|
|
||||||
// Create user message
|
// Create user message
|
||||||
@@ -113,25 +105,20 @@ export class ChatService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Store user message in vector memory
|
// Store user message in vector memory
|
||||||
await this.memoryService.storeConversationMessage(
|
await this.memoryService.storeConversationMessage(`User: ${sendMessageDto.content}`, conversationId, { messageId: userMessage.id });
|
||||||
`User: ${sendMessageDto.content}`,
|
|
||||||
conversationId,
|
|
||||||
{ messageId: userMessage.id },
|
|
||||||
);
|
|
||||||
|
|
||||||
// Generate context from memory
|
// Generate context from memory
|
||||||
const memoryContext = await this.memoryService.buildContextForConversation(
|
const memoryContext = await this.memoryService.buildContextForConversation(conversationId, sendMessageDto.content, conversation.characterId);
|
||||||
conversationId,
|
|
||||||
sendMessageDto.content,
|
|
||||||
conversation.characterId,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Build messages for LLM
|
// Build messages for LLM
|
||||||
const messages = this.buildLLMMessages(
|
const messages = this.buildLLMMessages(conversation.character.personalityPrompt, conversation.messages, sendMessageDto.content, memoryContext);
|
||||||
conversation.character.personalityPrompt,
|
|
||||||
conversation.messages,
|
// Grouped debug logging
|
||||||
sendMessageDto.content,
|
this.logger.debug(
|
||||||
memoryContext,
|
`[sendMessage] conversation=${conversationId}\n` +
|
||||||
|
`--- Knowledges/Context ---\n${memoryContext || '(none)'}\n` +
|
||||||
|
`--- Full Messages to LLM ---\n` +
|
||||||
|
messages.map((msg, idx) => `[${idx}] ${msg.role}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? '...' : ''}`).join('\n'),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Generate response
|
// Generate response
|
||||||
@@ -161,20 +148,12 @@ export class ChatService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Store assistant response in vector memory
|
// Store assistant response in vector memory
|
||||||
await this.memoryService.storeConversationMessage(
|
await this.memoryService.storeConversationMessage(`${conversation.character.name}: ${response.content}`, conversationId, { messageId: assistantMessage.id });
|
||||||
`${conversation.character.name}: ${response.content}`,
|
|
||||||
conversationId,
|
|
||||||
{ messageId: assistantMessage.id },
|
|
||||||
);
|
|
||||||
|
|
||||||
return { userMessage, assistantMessage };
|
return { userMessage, assistantMessage };
|
||||||
}
|
}
|
||||||
|
|
||||||
async *streamMessage(
|
async *streamMessage(conversationId: string, userId: string, sendMessageDto: SendMessageDto): AsyncGenerator<{ type: 'chunk' | 'message'; data: any }> {
|
||||||
conversationId: string,
|
|
||||||
userId: string,
|
|
||||||
sendMessageDto: SendMessageDto,
|
|
||||||
): AsyncGenerator<{ type: 'chunk' | 'message'; data: any }> {
|
|
||||||
const conversation = await this.findConversationById(conversationId, userId);
|
const conversation = await this.findConversationById(conversationId, userId);
|
||||||
|
|
||||||
// Create user message
|
// Create user message
|
||||||
@@ -189,25 +168,20 @@ export class ChatService {
|
|||||||
yield { type: 'message', data: { userMessage } };
|
yield { type: 'message', data: { userMessage } };
|
||||||
|
|
||||||
// Store user message in vector memory
|
// Store user message in vector memory
|
||||||
await this.memoryService.storeConversationMessage(
|
await this.memoryService.storeConversationMessage(`User: ${sendMessageDto.content}`, conversationId, { messageId: userMessage.id });
|
||||||
`User: ${sendMessageDto.content}`,
|
|
||||||
conversationId,
|
|
||||||
{ messageId: userMessage.id },
|
|
||||||
);
|
|
||||||
|
|
||||||
// Generate context from memory
|
// Generate context from memory
|
||||||
const memoryContext = await this.memoryService.buildContextForConversation(
|
const memoryContext = await this.memoryService.buildContextForConversation(conversationId, sendMessageDto.content, conversation.characterId);
|
||||||
conversationId,
|
|
||||||
sendMessageDto.content,
|
|
||||||
conversation.characterId,
|
|
||||||
);
|
|
||||||
|
|
||||||
// Build messages for LLM
|
// Build messages for LLM
|
||||||
const messages = this.buildLLMMessages(
|
const messages = this.buildLLMMessages(conversation.character.personalityPrompt, conversation.messages, sendMessageDto.content, memoryContext);
|
||||||
conversation.character.personalityPrompt,
|
|
||||||
conversation.messages,
|
// Grouped debug logging
|
||||||
sendMessageDto.content,
|
this.logger.debug(
|
||||||
memoryContext,
|
`[streamMessage] conversation=${conversationId}\n` +
|
||||||
|
`--- Knowledges/Context ---\n${memoryContext || '(none)'}\n` +
|
||||||
|
`--- Full Messages to LLM ---\n` +
|
||||||
|
messages.map((msg, idx) => `[${idx}] ${msg.role}: ${msg.content.substring(0, 200)}${msg.content.length > 200 ? '...' : ''}`).join('\n'),
|
||||||
);
|
);
|
||||||
|
|
||||||
// Generate streaming response
|
// Generate streaming response
|
||||||
@@ -233,10 +207,7 @@ export class ChatService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Update conversation stats
|
// Update conversation stats
|
||||||
const tokensUsed = this.llmService.countTokens([
|
const tokensUsed = this.llmService.countTokens([...messages, { role: 'assistant', content: fullContent }]);
|
||||||
...messages,
|
|
||||||
{ role: 'assistant', content: fullContent },
|
|
||||||
]);
|
|
||||||
|
|
||||||
await this.prisma.conversation.update({
|
await this.prisma.conversation.update({
|
||||||
where: { id: conversationId },
|
where: { id: conversationId },
|
||||||
@@ -247,11 +218,7 @@ export class ChatService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Store assistant response in vector memory
|
// Store assistant response in vector memory
|
||||||
await this.memoryService.storeConversationMessage(
|
await this.memoryService.storeConversationMessage(`${conversation.character.name}: ${fullContent}`, conversationId, { messageId: assistantMessage.id });
|
||||||
`${conversation.character.name}: ${fullContent}`,
|
|
||||||
conversationId,
|
|
||||||
{ messageId: assistantMessage.id },
|
|
||||||
);
|
|
||||||
|
|
||||||
yield { type: 'message', data: { assistantMessage } };
|
yield { type: 'message', data: { assistantMessage } };
|
||||||
}
|
}
|
||||||
@@ -265,7 +232,10 @@ export class ChatService {
|
|||||||
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
const messages: Array<{ role: 'system' | 'user' | 'assistant'; content: string }> = [];
|
||||||
|
|
||||||
// Add system message with personality and context
|
// Add system message with personality and context
|
||||||
let systemContent = personalityPrompt;
|
let systemContent = `You are now role playing as the character based on the following personality description. You should use this information to inform your responses and stay in character. Always try to stay in character and provide responses that align with the personality and history provided. You are now talking to a user. The user will say something, and you will respond as the character. Remember this is a conversation, keep it talking like and maintain your character\n\n`;
|
||||||
|
if (personalityPrompt) {
|
||||||
|
systemContent = personalityPrompt;
|
||||||
|
}
|
||||||
if (memoryContext) {
|
if (memoryContext) {
|
||||||
systemContent += `\n\nUse the following context to inform your responses:\n${memoryContext}`;
|
systemContent += `\n\nUse the following context to inform your responses:\n${memoryContext}`;
|
||||||
}
|
}
|
||||||
|
|||||||
1
apps/backend/src/common/middleware/index.ts
Normal file
1
apps/backend/src/common/middleware/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { RequestLoggerMiddleware } from './request-logger.middleware';
|
||||||
@@ -0,0 +1,62 @@
|
|||||||
|
import { Injectable, NestMiddleware, Logger } from '@nestjs/common';
|
||||||
|
import { Request, Response, NextFunction } from 'express';
|
||||||
|
|
||||||
|
@Injectable()
|
||||||
|
export class RequestLoggerMiddleware implements NestMiddleware {
|
||||||
|
private logger = new Logger('HTTP');
|
||||||
|
private readonly isEnabled: boolean;
|
||||||
|
private readonly logLevel: 'verbose' | 'standard' | 'minimal';
|
||||||
|
|
||||||
|
constructor() {
|
||||||
|
this.isEnabled = process.env.REQUEST_LOGGER !== 'false';
|
||||||
|
const level = process.env.REQUEST_LOGGER_LEVEL;
|
||||||
|
this.logLevel = level === 'verbose' || level === 'minimal' ? level : 'standard';
|
||||||
|
}
|
||||||
|
|
||||||
|
use(req: Request, res: Response, next: NextFunction) {
|
||||||
|
if (!this.isEnabled) {
|
||||||
|
return next();
|
||||||
|
}
|
||||||
|
|
||||||
|
const { method, originalUrl, ip, headers } = req;
|
||||||
|
const userAgent = headers['user-agent'] || 'unknown';
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
// Log request start (verbose only)
|
||||||
|
if (this.logLevel === 'verbose') {
|
||||||
|
this.logger.log(`${method} ${originalUrl} - ${ip} - ${userAgent}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Capture response finish
|
||||||
|
res.on('finish', () => {
|
||||||
|
const duration = Date.now() - startTime;
|
||||||
|
const statusCode = res.statusCode;
|
||||||
|
const contentLength = res.get('content-length') || 0;
|
||||||
|
|
||||||
|
// Build message based on log level
|
||||||
|
let message: string;
|
||||||
|
switch (this.logLevel) {
|
||||||
|
case 'verbose':
|
||||||
|
message = `${method} ${originalUrl} ${statusCode} - ${duration}ms - ${contentLength}b - ${ip}`;
|
||||||
|
break;
|
||||||
|
case 'minimal':
|
||||||
|
message = `${method} ${originalUrl} ${statusCode}`;
|
||||||
|
break;
|
||||||
|
default: // standard
|
||||||
|
message = `${method} ${originalUrl} ${statusCode} - ${duration}ms`;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine log level based on status code
|
||||||
|
if (statusCode >= 500) {
|
||||||
|
this.logger.error(message);
|
||||||
|
} else if (statusCode >= 400) {
|
||||||
|
this.logger.warn(message);
|
||||||
|
} else {
|
||||||
|
this.logger.log(message);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
next();
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -4,9 +4,11 @@ import {
|
|||||||
Get,
|
Get,
|
||||||
Delete,
|
Delete,
|
||||||
Param,
|
Param,
|
||||||
|
Body,
|
||||||
UploadedFile,
|
UploadedFile,
|
||||||
UseInterceptors,
|
UseInterceptors,
|
||||||
BadRequestException,
|
BadRequestException,
|
||||||
|
Logger,
|
||||||
} from '@nestjs/common';
|
} from '@nestjs/common';
|
||||||
import { FileInterceptor } from '@nestjs/platform-express';
|
import { FileInterceptor } from '@nestjs/platform-express';
|
||||||
import { ApiTags, ApiOperation, ApiResponse, ApiBearerAuth, ApiParam, ApiConsumes, ApiBody, ApiProperty } from '@nestjs/swagger';
|
import { ApiTags, ApiOperation, ApiResponse, ApiBearerAuth, ApiParam, ApiConsumes, ApiBody, ApiProperty } from '@nestjs/swagger';
|
||||||
@@ -22,10 +24,17 @@ class UploadResponseDto {
|
|||||||
message: string;
|
message: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
class ImportUrlDto {
|
||||||
|
@ApiProperty({ description: 'URL to import', example: 'https://sakurazaka46.com/s/s46/diary/detail/68008' })
|
||||||
|
url: string;
|
||||||
|
}
|
||||||
|
|
||||||
@ApiTags('import')
|
@ApiTags('import')
|
||||||
@ApiBearerAuth()
|
@ApiBearerAuth()
|
||||||
@Controller('import')
|
@Controller('import')
|
||||||
export class ImportController {
|
export class ImportController {
|
||||||
|
private readonly logger = new Logger(ImportController.name);
|
||||||
|
|
||||||
constructor(private importService: ImportService) {}
|
constructor(private importService: ImportService) {}
|
||||||
|
|
||||||
@Post('characters/:characterId/files')
|
@Post('characters/:characterId/files')
|
||||||
@@ -98,4 +107,28 @@ export class ImportController {
|
|||||||
await this.importService.deleteKnowledge(knowledgeId, userId);
|
await this.importService.deleteKnowledge(knowledgeId, userId);
|
||||||
return { message: 'Knowledge deleted successfully' };
|
return { message: 'Knowledge deleted successfully' };
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Post('characters/:characterId/url')
|
||||||
|
@ApiOperation({ summary: 'Import content from URL for character knowledge' })
|
||||||
|
@ApiParam({ name: 'characterId', description: 'Character ID' })
|
||||||
|
@ApiBody({ type: ImportUrlDto })
|
||||||
|
@ApiResponse({ status: 201, description: 'URL content is being imported and processed', type: UploadResponseDto })
|
||||||
|
@ApiResponse({ status: 400, description: 'Invalid URL or unsupported website' })
|
||||||
|
@ApiResponse({ status: 401, description: 'Unauthorized' })
|
||||||
|
async importFromUrl(
|
||||||
|
@Param('characterId') characterId: string,
|
||||||
|
@Body() importUrlDto: ImportUrlDto,
|
||||||
|
@CurrentUser('userId') userId: string,
|
||||||
|
): Promise<UploadResponseDto> {
|
||||||
|
this.logger.log(`Received URL import request for character: ${characterId}, url: ${importUrlDto.url}`);
|
||||||
|
|
||||||
|
if (!importUrlDto.url) {
|
||||||
|
this.logger.warn('URL import request rejected: URL is required');
|
||||||
|
throw new BadRequestException('URL is required');
|
||||||
|
}
|
||||||
|
|
||||||
|
const result = await this.importService.importFromUrl(importUrlDto.url, characterId, userId);
|
||||||
|
this.logger.log(`URL import started, knowledgeId: ${result.knowledgeId}`);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,19 +1,23 @@
|
|||||||
import { Injectable, BadRequestException } from '@nestjs/common';
|
import { Injectable, BadRequestException, Logger } from '@nestjs/common';
|
||||||
import { PrismaService } from '../prisma/prisma.service';
|
import { PrismaService } from '../prisma/prisma.service';
|
||||||
import { MemoryService } from '../vector/memory.service';
|
import { MemoryService } from '../vector/memory.service';
|
||||||
import { TextFileAdapter } from './adapters/text-file.adapter';
|
import { TextFileAdapter } from './adapters/text-file.adapter';
|
||||||
import { IImportAdapter, ImportResult } from './interfaces/import-adapter.interface';
|
import { IImportAdapter, ImportResult } from './interfaces/import-adapter.interface';
|
||||||
import { ImportStatus } from '@prisma/client';
|
import { IWebScraper, WebScraperResult } from './interfaces/web-scraper.interface';
|
||||||
|
import { SakurazakaScraper } from './scrapers/sakurazaka-scraper';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class ImportService {
|
export class ImportService {
|
||||||
|
private readonly logger = new Logger(ImportService.name);
|
||||||
private adapters: IImportAdapter[];
|
private adapters: IImportAdapter[];
|
||||||
|
private scrapers: IWebScraper[];
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private prisma: PrismaService,
|
private prisma: PrismaService,
|
||||||
private memoryService: MemoryService,
|
private memoryService: MemoryService,
|
||||||
) {
|
) {
|
||||||
this.adapters = [new TextFileAdapter()];
|
this.adapters = [new TextFileAdapter()];
|
||||||
|
this.scrapers = [new SakurazakaScraper()];
|
||||||
}
|
}
|
||||||
|
|
||||||
async uploadFile(
|
async uploadFile(
|
||||||
@@ -33,6 +37,14 @@ export class ImportService {
|
|||||||
// Parse the file
|
// Parse the file
|
||||||
const result = await adapter.parse(file);
|
const result = await adapter.parse(file);
|
||||||
|
|
||||||
|
// Reject if content is too large
|
||||||
|
const maxRawContentLength = 100000;
|
||||||
|
if (result.content.length > maxRawContentLength) {
|
||||||
|
throw new BadRequestException(
|
||||||
|
`File too large: ${result.content.length} characters (max: ${maxRawContentLength}). Please use a smaller file.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
// Create knowledge entry
|
// Create knowledge entry
|
||||||
const knowledge = await this.prisma.characterKnowledge.create({
|
const knowledge = await this.prisma.characterKnowledge.create({
|
||||||
data: {
|
data: {
|
||||||
@@ -49,18 +61,9 @@ export class ImportService {
|
|||||||
});
|
});
|
||||||
|
|
||||||
// Process the content in the background
|
// Process the content in the background
|
||||||
|
this.logger.log(`[${knowledge.id}] Starting background processing for file upload, character: ${characterId}`);
|
||||||
this.processContent(knowledge.id, characterId, result).catch((error) => {
|
this.processContent(knowledge.id, characterId, result).catch((error) => {
|
||||||
console.error('Error processing import:', error);
|
this.logger.error(`[${knowledge.id}] Error processing file import:`, error);
|
||||||
this.prisma.characterKnowledge.update({
|
|
||||||
where: { id: knowledge.id },
|
|
||||||
data: {
|
|
||||||
status: 'failed',
|
|
||||||
processingInfo: {
|
|
||||||
...result.metadata,
|
|
||||||
error: error instanceof Error ? error.message : 'Unknown error',
|
|
||||||
},
|
|
||||||
},
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
|
||||||
return {
|
return {
|
||||||
@@ -141,24 +144,86 @@ export class ImportService {
|
|||||||
characterId: string,
|
characterId: string,
|
||||||
result: ImportResult,
|
result: ImportResult,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
try {
|
this.logger.log(`[${knowledgeId}] Starting processContent, content length: ${result.content.length}`);
|
||||||
// Chunk the content into smaller pieces
|
|
||||||
const chunks = this.chunkContent(result.content, 1000, 200);
|
// Reduce memory pressure by limiting content size more aggressively
|
||||||
|
const maxContentLength = 15000; // Reduced from 30000
|
||||||
|
let content = result.content;
|
||||||
|
if (content.length > maxContentLength) {
|
||||||
|
this.logger.warn(`[${knowledgeId}] Content truncated from ${content.length} to ${maxContentLength} characters`);
|
||||||
|
content = content.substring(0, maxContentLength) + '\n\n[Content truncated due to size limits]';
|
||||||
|
}
|
||||||
|
|
||||||
// Store each chunk in vector memory
|
// Process chunks one at a time using generator to minimize memory usage
|
||||||
for (let i = 0; i < chunks.length; i++) {
|
const chunkSize = 800; // Reduced from 1000
|
||||||
await this.memoryService.storeCharacterKnowledge(
|
const overlap = 100; // Reduced from 200
|
||||||
chunks[i],
|
const maxChunks = 20; // Reduced from 30
|
||||||
characterId,
|
|
||||||
knowledgeId,
|
let processedChunks = 0;
|
||||||
{
|
let totalChunks = 0;
|
||||||
...result.metadata,
|
let start = 0;
|
||||||
chunkIndex: i,
|
|
||||||
totalChunks: chunks.length,
|
try {
|
||||||
},
|
this.logger.log(`[${knowledgeId}] Processing chunks with streaming approach...`);
|
||||||
);
|
|
||||||
|
while (start < content.length && processedChunks < maxChunks) {
|
||||||
|
// Calculate chunk boundaries
|
||||||
|
const end = Math.min(start + chunkSize, content.length);
|
||||||
|
let chunkEnd = end;
|
||||||
|
|
||||||
|
// Try to break at a sentence boundary (only if not at end)
|
||||||
|
if (end < content.length) {
|
||||||
|
const chunk = content.slice(start, end);
|
||||||
|
const lastPeriod = chunk.lastIndexOf('.');
|
||||||
|
const lastNewline = chunk.lastIndexOf('\n');
|
||||||
|
const breakPoint = Math.max(lastPeriod, lastNewline);
|
||||||
|
|
||||||
|
if (breakPoint > chunkSize * 0.5) {
|
||||||
|
chunkEnd = start + breakPoint + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the chunk
|
||||||
|
const chunk = content.slice(start, chunkEnd).trim();
|
||||||
|
|
||||||
|
if (chunk.length > 0) {
|
||||||
|
totalChunks++;
|
||||||
|
|
||||||
|
if (processedChunks < maxChunks) {
|
||||||
|
this.logger.debug(`[${knowledgeId}] Processing chunk ${processedChunks + 1} (${chunk.length} chars)...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.memoryService.storeCharacterKnowledge(
|
||||||
|
chunk,
|
||||||
|
characterId,
|
||||||
|
knowledgeId,
|
||||||
|
{
|
||||||
|
...result.metadata,
|
||||||
|
chunkIndex: processedChunks,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
processedChunks++;
|
||||||
|
this.logger.debug(`[${knowledgeId}] Chunk ${processedChunks} stored successfully`);
|
||||||
|
} catch (chunkError) {
|
||||||
|
this.logger.error(`[${knowledgeId}] Failed to store chunk ${processedChunks + 1}:`, chunkError);
|
||||||
|
throw chunkError;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force garbage collection opportunity between chunks
|
||||||
|
if (processedChunks % 2 === 0) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 150));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move to next chunk position
|
||||||
|
const nextStart = start + (chunkEnd - start) - overlap;
|
||||||
|
if (nextStart <= start) break; // Prevent infinite loop
|
||||||
|
start = nextStart;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.logger.log(`[${knowledgeId}] Processed ${processedChunks}/${totalChunks} chunks, updating status to completed`);
|
||||||
|
|
||||||
// Update status to completed
|
// Update status to completed
|
||||||
await this.prisma.characterKnowledge.update({
|
await this.prisma.characterKnowledge.update({
|
||||||
where: { id: knowledgeId },
|
where: { id: knowledgeId },
|
||||||
@@ -166,49 +231,219 @@ export class ImportService {
|
|||||||
status: 'completed',
|
status: 'completed',
|
||||||
processingInfo: {
|
processingInfo: {
|
||||||
...result.metadata,
|
...result.metadata,
|
||||||
chunksProcessed: chunks.length,
|
chunksProcessed: processedChunks,
|
||||||
|
originalChunks: totalChunks,
|
||||||
|
wasTruncated: result.content.length > maxContentLength || totalChunks > maxChunks,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
|
this.logger.log(`[${knowledgeId}] Processing completed successfully`);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
|
this.logger.error(`[${knowledgeId}] Error in processContent:`, error);
|
||||||
|
|
||||||
// Update status to failed
|
// Update status to failed
|
||||||
await this.prisma.characterKnowledge.update({
|
try {
|
||||||
where: { id: knowledgeId },
|
await this.prisma.characterKnowledge.update({
|
||||||
data: {
|
where: { id: knowledgeId },
|
||||||
status: 'failed',
|
data: {
|
||||||
processingInfo: {
|
status: 'failed',
|
||||||
...result.metadata,
|
processingInfo: {
|
||||||
error: error instanceof Error ? error.message : 'Unknown error',
|
...result.metadata,
|
||||||
|
error: error instanceof Error ? error.message : 'Unknown error',
|
||||||
|
},
|
||||||
},
|
},
|
||||||
},
|
});
|
||||||
});
|
this.logger.log(`[${knowledgeId}] Status updated to failed`);
|
||||||
|
} catch (dbError) {
|
||||||
|
this.logger.error(`[${knowledgeId}] Failed to update status to failed:`, dbError);
|
||||||
|
}
|
||||||
|
|
||||||
throw error;
|
throw error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private chunkContent(content: string, chunkSize: number, overlap: number): string[] {
|
async importFromUrl(
|
||||||
const chunks: string[] = [];
|
url: string,
|
||||||
let start = 0;
|
characterId: string,
|
||||||
|
userId: string,
|
||||||
while (start < content.length) {
|
): Promise<{ knowledgeId: string; message: string }> {
|
||||||
const end = Math.min(start + chunkSize, content.length);
|
// Validate URL format
|
||||||
let chunk = content.slice(start, end);
|
let urlObj: URL;
|
||||||
|
try {
|
||||||
// Try to break at a sentence boundary
|
urlObj = new URL(url);
|
||||||
if (end < content.length) {
|
} catch {
|
||||||
const lastPeriod = chunk.lastIndexOf('.');
|
throw new BadRequestException('Invalid URL format');
|
||||||
const lastNewline = chunk.lastIndexOf('\n');
|
|
||||||
const breakPoint = Math.max(lastPeriod, lastNewline);
|
|
||||||
|
|
||||||
if (breakPoint > chunkSize * 0.5) {
|
|
||||||
chunk = chunk.slice(0, breakPoint + 1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
chunks.push(chunk.trim());
|
|
||||||
start += chunk.length - overlap;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return chunks;
|
// Find appropriate scraper
|
||||||
|
const scraper = this.scrapers.find((s) => s.canHandle(url));
|
||||||
|
|
||||||
|
if (!scraper) {
|
||||||
|
throw new BadRequestException(
|
||||||
|
`Unsupported URL: ${urlObj.hostname}. No scraper available for this website.`,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scrape the content
|
||||||
|
const result = await scraper.scrape(url);
|
||||||
|
|
||||||
|
// Reject if content is too large
|
||||||
|
const maxRawContentLength = 100000;
|
||||||
|
if (result.content.length > maxRawContentLength) {
|
||||||
|
throw new BadRequestException(
|
||||||
|
`Content too large: ${result.content.length} characters (max: ${maxRawContentLength}). Please use a shorter article.`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create knowledge entry
|
||||||
|
const knowledgeName = result.metadata.title || `Import from ${urlObj.hostname}`;
|
||||||
|
const knowledge = await this.prisma.characterKnowledge.create({
|
||||||
|
data: {
|
||||||
|
name: knowledgeName,
|
||||||
|
sourceType: 'url',
|
||||||
|
sourceName: url,
|
||||||
|
mimeType: 'text/html',
|
||||||
|
rawContent: result.content,
|
||||||
|
status: 'processing',
|
||||||
|
processingInfo: result.metadata,
|
||||||
|
characterId,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Process the content in the background
|
||||||
|
this.logger.log(`[${knowledge.id}] Starting background processing for URL: ${url}, character: ${characterId}`);
|
||||||
|
this.processScrapedContent(knowledge.id, characterId, result).catch((error) => {
|
||||||
|
this.logger.error(`[${knowledge.id}] Error processing URL import:`, error);
|
||||||
|
});
|
||||||
|
|
||||||
|
return {
|
||||||
|
knowledgeId: knowledge.id,
|
||||||
|
message: 'URL content is being imported and processed',
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
private async processScrapedContent(
|
||||||
|
knowledgeId: string,
|
||||||
|
characterId: string,
|
||||||
|
result: WebScraperResult,
|
||||||
|
): Promise<void> {
|
||||||
|
this.logger.log(`[${knowledgeId}] Starting processScrapedContent, content length: ${result.content.length}`);
|
||||||
|
|
||||||
|
// Reduce memory pressure by limiting content size more aggressively
|
||||||
|
const maxContentLength = 15000; // Reduced from 30000
|
||||||
|
let content = result.content;
|
||||||
|
if (content.length > maxContentLength) {
|
||||||
|
this.logger.warn(`[${knowledgeId}] Content truncated from ${content.length} to ${maxContentLength} characters`);
|
||||||
|
content = content.substring(0, maxContentLength) + '\n\n[Content truncated due to size limits]';
|
||||||
|
}
|
||||||
|
|
||||||
|
// Process chunks one at a time using streaming approach to minimize memory usage
|
||||||
|
const chunkSize = 800; // Reduced from 1000
|
||||||
|
const overlap = 100; // Reduced from 200
|
||||||
|
const maxChunks = 20; // Reduced from 30
|
||||||
|
|
||||||
|
let processedChunks = 0;
|
||||||
|
let totalChunks = 0;
|
||||||
|
let start = 0;
|
||||||
|
|
||||||
|
try {
|
||||||
|
this.logger.log(`[${knowledgeId}] Processing chunks with streaming approach...`);
|
||||||
|
|
||||||
|
while (start < content.length && processedChunks < maxChunks) {
|
||||||
|
// Calculate chunk boundaries
|
||||||
|
const end = Math.min(start + chunkSize, content.length);
|
||||||
|
let chunkEnd = end;
|
||||||
|
|
||||||
|
// Try to break at a sentence boundary (only if not at end)
|
||||||
|
if (end < content.length) {
|
||||||
|
const chunk = content.slice(start, end);
|
||||||
|
const lastPeriod = chunk.lastIndexOf('.');
|
||||||
|
const lastNewline = chunk.lastIndexOf('\n');
|
||||||
|
const breakPoint = Math.max(lastPeriod, lastNewline);
|
||||||
|
|
||||||
|
if (breakPoint > chunkSize * 0.5) {
|
||||||
|
chunkEnd = start + breakPoint + 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the chunk
|
||||||
|
const chunk = content.slice(start, chunkEnd).trim();
|
||||||
|
|
||||||
|
if (chunk.length > 0) {
|
||||||
|
totalChunks++;
|
||||||
|
|
||||||
|
if (processedChunks < maxChunks) {
|
||||||
|
this.logger.debug(`[${knowledgeId}] Processing chunk ${processedChunks + 1} (${chunk.length} chars)...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await this.memoryService.storeCharacterKnowledge(
|
||||||
|
chunk,
|
||||||
|
characterId,
|
||||||
|
knowledgeId,
|
||||||
|
{
|
||||||
|
...result.metadata,
|
||||||
|
chunkIndex: processedChunks,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
processedChunks++;
|
||||||
|
this.logger.debug(`[${knowledgeId}] Chunk ${processedChunks} stored successfully`);
|
||||||
|
} catch (chunkError) {
|
||||||
|
this.logger.error(`[${knowledgeId}] Failed to store chunk ${processedChunks + 1}:`, chunkError);
|
||||||
|
throw chunkError;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Force garbage collection opportunity between chunks
|
||||||
|
if (processedChunks % 2 === 0) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 150));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move to next chunk position
|
||||||
|
const nextStart = start + (chunkEnd - start) - overlap;
|
||||||
|
if (nextStart <= start) break; // Prevent infinite loop
|
||||||
|
start = nextStart;
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.log(`[${knowledgeId}] Processed ${processedChunks}/${totalChunks} chunks, updating status to completed`);
|
||||||
|
|
||||||
|
// Update status to completed
|
||||||
|
await this.prisma.characterKnowledge.update({
|
||||||
|
where: { id: knowledgeId },
|
||||||
|
data: {
|
||||||
|
status: 'completed',
|
||||||
|
processingInfo: {
|
||||||
|
...result.metadata,
|
||||||
|
chunksProcessed: processedChunks,
|
||||||
|
originalChunks: totalChunks,
|
||||||
|
wasTruncated: result.content.length > maxContentLength || totalChunks > maxChunks,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
this.logger.log(`[${knowledgeId}] Processing completed successfully`);
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(`[${knowledgeId}] Error in processScrapedContent:`, error);
|
||||||
|
|
||||||
|
// Update status to failed
|
||||||
|
try {
|
||||||
|
await this.prisma.characterKnowledge.update({
|
||||||
|
where: { id: knowledgeId },
|
||||||
|
data: {
|
||||||
|
status: 'failed',
|
||||||
|
processingInfo: {
|
||||||
|
...result.metadata,
|
||||||
|
error: error instanceof Error ? error.message : 'Unknown error',
|
||||||
|
},
|
||||||
|
},
|
||||||
|
});
|
||||||
|
this.logger.log(`[${knowledgeId}] Status updated to failed`);
|
||||||
|
} catch (dbError) {
|
||||||
|
this.logger.error(`[${knowledgeId}] Failed to update status to failed:`, dbError);
|
||||||
|
}
|
||||||
|
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
2
apps/backend/src/import/interfaces/index.ts
Normal file
2
apps/backend/src/import/interfaces/index.ts
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
export { IImportAdapter, ImportResult } from './import-adapter.interface';
|
||||||
|
export { IWebScraper, WebScraperResult } from './web-scraper.interface';
|
||||||
23
apps/backend/src/import/interfaces/web-scraper.interface.ts
Normal file
23
apps/backend/src/import/interfaces/web-scraper.interface.ts
Normal file
@@ -0,0 +1,23 @@
|
|||||||
|
export interface WebScraperResult {
|
||||||
|
content: string;
|
||||||
|
metadata: {
|
||||||
|
sourceName: string;
|
||||||
|
url: string;
|
||||||
|
title?: string;
|
||||||
|
author?: string;
|
||||||
|
publishedDate?: string;
|
||||||
|
[key: string]: any;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface IWebScraper {
|
||||||
|
/**
|
||||||
|
* Check if this scraper can handle the given URL
|
||||||
|
*/
|
||||||
|
canHandle(url: string): boolean;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Scrape content from the URL
|
||||||
|
*/
|
||||||
|
scrape(url: string): Promise<WebScraperResult>;
|
||||||
|
}
|
||||||
1
apps/backend/src/import/scrapers/index.ts
Normal file
1
apps/backend/src/import/scrapers/index.ts
Normal file
@@ -0,0 +1 @@
|
|||||||
|
export { SakurazakaScraper } from './sakurazaka-scraper';
|
||||||
337
apps/backend/src/import/scrapers/sakurazaka-scraper.spec.ts
Normal file
337
apps/backend/src/import/scrapers/sakurazaka-scraper.spec.ts
Normal file
@@ -0,0 +1,337 @@
|
|||||||
|
import { SakurazakaScraper } from './sakurazaka-scraper';
|
||||||
|
|
||||||
|
describe('SakurazakaScraper', () => {
|
||||||
|
let scraper: SakurazakaScraper;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
scraper = new SakurazakaScraper();
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('canHandle', () => {
|
||||||
|
it('should return true for sakurazaka46.com URLs', () => {
|
||||||
|
const urls = [
|
||||||
|
'https://sakurazaka46.com/s/s46/diary/detail/68008',
|
||||||
|
'https://sakurazaka46.com/s/s46/diary/detail/68008?ima=0000&cd=blog',
|
||||||
|
'https://www.sakurazaka46.com/s/s46/diary/detail/10000',
|
||||||
|
'https://sakurazaka46.com/s/s46/diary/blog/list',
|
||||||
|
];
|
||||||
|
|
||||||
|
urls.forEach((url) => {
|
||||||
|
expect(scraper.canHandle(url)).toBe(true);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return false for non-sakurazaka46.com URLs', () => {
|
||||||
|
const urls = [
|
||||||
|
'https://example.com/blog/123',
|
||||||
|
'https://nogizaka46.com/s/n46/diary/detail/12345',
|
||||||
|
'https://hinatazaka46.com/s/h46/diary/detail/12345',
|
||||||
|
'https://sakurazaka46.net/fake/blog/123',
|
||||||
|
'not-a-url',
|
||||||
|
'',
|
||||||
|
];
|
||||||
|
|
||||||
|
urls.forEach((url) => {
|
||||||
|
expect(scraper.canHandle(url)).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should return false for invalid URLs', () => {
|
||||||
|
expect(scraper.canHandle('invalid-url')).toBe(false);
|
||||||
|
expect(scraper.canHandle('')).toBe(false);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
describe('scrape', () => {
|
||||||
|
const mockBlogHtml = `
|
||||||
|
<article class="post wovn-ignore">
|
||||||
|
<div class="col-l widfix-sp">
|
||||||
|
<div class="com-calendindinav">
|
||||||
|
<div class="wrap-bg">
|
||||||
|
<div class="inner">
|
||||||
|
<div class="year-month">
|
||||||
|
<div class="ym-inner wf-a">
|
||||||
|
<div class="ym-txt">
|
||||||
|
<span class="ym-year">2026</span>
|
||||||
|
<span class="ym-month">2</span>
|
||||||
|
</div>
|
||||||
|
<p class="date wf-a">18</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-r widfix-sp">
|
||||||
|
<div class="inner title-wrap"><h1 class="title">The growing up train</h1></div>
|
||||||
|
</div>
|
||||||
|
<div class="col-l eigo-wrap pc">
|
||||||
|
<div class="eigo-inner">
|
||||||
|
<p class="eigo wf-a">YU MURAI</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="col-r">
|
||||||
|
<div class="box-article">
|
||||||
|
<p><br/><br/>こんばんは<br/><br/>14thシングル<br/><br/>嬉しいです</p>
|
||||||
|
<p>四期生は肝が据わっています!<br/></p>
|
||||||
|
<img src="/files/14/diary/s46/blog/moblog/202602/mobpqiCQR.jpg"/>
|
||||||
|
<p>またね〜<br/>村井優</p>
|
||||||
|
</div>
|
||||||
|
<div class="blog-foot-nav">
|
||||||
|
<div class="com-btn-lcr">
|
||||||
|
<p class="btn-type1s"><a href="/s/s46/diary/detail/67798">前へ</a></p>
|
||||||
|
<p class="btn-type3"><a href="/s/s46/diary/blog/list?ct=67">村井 優のブログ一覧</a></p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div class="app-button">
|
||||||
|
<div class="box-a">
|
||||||
|
<p class="app-title">櫻坂46メッセージ</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
`;
|
||||||
|
|
||||||
|
beforeEach(() => {
|
||||||
|
global.fetch = jest.fn();
|
||||||
|
});
|
||||||
|
|
||||||
|
afterEach(() => {
|
||||||
|
jest.resetAllMocks();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should successfully scrape blog content', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockBlogHtml),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
|
||||||
|
|
||||||
|
expect(result.content).toContain('Title: The growing up train');
|
||||||
|
expect(result.content).toContain('Author: YU MURAI');
|
||||||
|
expect(result.content).toContain('Date: 2026-02-18');
|
||||||
|
expect(result.content).toContain('こんばんは');
|
||||||
|
expect(result.content).toContain('[Image: mobpqiCQR.jpg]');
|
||||||
|
expect(result.content).toContain('またね〜');
|
||||||
|
expect(result.content).toContain('村井優');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should extract correct metadata', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockBlogHtml),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
|
||||||
|
|
||||||
|
expect(result.metadata).toEqual({
|
||||||
|
sourceName: 'Sakurazaka46 Blog',
|
||||||
|
url: 'https://sakurazaka46.com/s/s46/diary/detail/68008',
|
||||||
|
title: 'The growing up train',
|
||||||
|
author: 'YU MURAI',
|
||||||
|
publishedDate: '2026-02-18',
|
||||||
|
blogId: '68008',
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle blog posts without title gracefully', async () => {
|
||||||
|
const htmlWithoutTitle = mockBlogHtml.replace(
|
||||||
|
/<h1 class="title">[^<]*<\/h1>/,
|
||||||
|
''
|
||||||
|
);
|
||||||
|
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(htmlWithoutTitle),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
|
||||||
|
|
||||||
|
expect(result.metadata.title).toBeFalsy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle blog posts without author gracefully', async () => {
|
||||||
|
const htmlWithoutAuthor = mockBlogHtml.replace(
|
||||||
|
/<p class="eigo wf-a">[^<]*<\/p>/,
|
||||||
|
''
|
||||||
|
);
|
||||||
|
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(htmlWithoutAuthor),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
|
||||||
|
|
||||||
|
expect(result.metadata.author).toBeFalsy();
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle HTTP errors', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: false,
|
||||||
|
status: 404,
|
||||||
|
statusText: 'Not Found',
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/99999')
|
||||||
|
).rejects.toThrow('Failed to fetch page: 404 Not Found');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle network errors', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockRejectedValueOnce(new Error('Network error'));
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008')
|
||||||
|
).rejects.toThrow('Failed to scrape Sakurazaka46 blog: Network error');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle missing article element', async () => {
|
||||||
|
const htmlWithoutArticle = '<div>No article here</div>';
|
||||||
|
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(htmlWithoutArticle),
|
||||||
|
});
|
||||||
|
|
||||||
|
await expect(
|
||||||
|
scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008')
|
||||||
|
).rejects.toThrow('Failed to scrape Sakurazaka46 blog: Could not find article content in the page');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle blog ID extraction from various URL formats', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValue({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockBlogHtml),
|
||||||
|
});
|
||||||
|
|
||||||
|
const testCases = [
|
||||||
|
{ url: 'https://sakurazaka46.com/s/s46/diary/detail/68008', expectedId: '68008' },
|
||||||
|
{ url: 'https://sakurazaka46.com/s/s46/diary/detail/68008?ima=0000&cd=blog', expectedId: '68008' },
|
||||||
|
{ url: 'https://sakurazaka46.com/s/s46/diary/detail/12345#anchor', expectedId: '12345' },
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const { url, expectedId } of testCases) {
|
||||||
|
const result = await scraper.scrape(url);
|
||||||
|
expect(result.metadata.blogId).toBe(expectedId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should clean up HTML entities and tags properly', async () => {
|
||||||
|
const htmlWithEntities = `
|
||||||
|
<article>
|
||||||
|
<h1 class="title">Test & Example</h1>
|
||||||
|
<div class="box-article">
|
||||||
|
<p>Hello world <script>alert(1)</script></p>
|
||||||
|
<p>Line 1<br/>Line 2</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
`;
|
||||||
|
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(htmlWithEntities),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/1');
|
||||||
|
|
||||||
|
expect(result.content).toContain('Test & Example');
|
||||||
|
expect(result.content).toContain('Hello world');
|
||||||
|
expect(result.content).not.toContain(' ');
|
||||||
|
expect(result.content).not.toContain('<script>');
|
||||||
|
expect(result.content).not.toContain('<br/>');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should remove navigation and footer sections', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockBlogHtml),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
|
||||||
|
|
||||||
|
// Navigation elements should not be in the content
|
||||||
|
expect(result.content).not.toContain('前へ');
|
||||||
|
expect(result.content).not.toContain('村井 優のブログ一覧');
|
||||||
|
expect(result.content).not.toContain('櫻坂46メッセージ');
|
||||||
|
expect(result.content).not.toContain('blog-foot-nav');
|
||||||
|
expect(result.content).not.toContain('app-button');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should handle multiple images in blog post', async () => {
|
||||||
|
const htmlWithMultipleImages = `
|
||||||
|
<article>
|
||||||
|
<h1 class="title">Multi Image Post</h1>
|
||||||
|
<div class="box-article">
|
||||||
|
<img src="/files/image1.jpg"/>
|
||||||
|
<p>Text between images</p>
|
||||||
|
<img src="/files/image2.png"/>
|
||||||
|
<img src="/files/image3.gif"/>
|
||||||
|
<p>After images text</p>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
`;
|
||||||
|
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(htmlWithMultipleImages),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/1');
|
||||||
|
|
||||||
|
expect(result.content).toContain('[Image: image1.jpg]');
|
||||||
|
expect(result.content).toContain('[Image: image2.png]');
|
||||||
|
expect(result.content).toContain('[Image: image3.gif]');
|
||||||
|
expect(result.content).toContain('Text between images');
|
||||||
|
expect(result.content).toContain('After images text');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should extract all images from complex blog structure', async () => {
|
||||||
|
const htmlWithComplexImages = `
|
||||||
|
<article class="post">
|
||||||
|
<div class="col-r">
|
||||||
|
<div class="box-article">
|
||||||
|
<p><img src="/files/14/diary/blog1.jpg"/></p>
|
||||||
|
<p>First paragraph</p>
|
||||||
|
<p><img src="/files/14/diary/blog2.jpg"/><br/><img src="/files/14/diary/blog3.jpg"/></p>
|
||||||
|
<p>Second paragraph with <img src="/files/14/diary/blog4.jpg"/> inline image</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</article>
|
||||||
|
`;
|
||||||
|
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(htmlWithComplexImages),
|
||||||
|
});
|
||||||
|
|
||||||
|
const result = await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/1');
|
||||||
|
|
||||||
|
expect(result.content).toContain('[Image: blog1.jpg]');
|
||||||
|
expect(result.content).toContain('[Image: blog2.jpg]');
|
||||||
|
expect(result.content).toContain('[Image: blog3.jpg]');
|
||||||
|
expect(result.content).toContain('[Image: blog4.jpg]');
|
||||||
|
expect(result.content).toContain('First paragraph');
|
||||||
|
expect(result.content).toContain('Second paragraph');
|
||||||
|
});
|
||||||
|
|
||||||
|
it('should set correct User-Agent header', async () => {
|
||||||
|
(global.fetch as jest.Mock).mockResolvedValueOnce({
|
||||||
|
ok: true,
|
||||||
|
text: () => Promise.resolve(mockBlogHtml),
|
||||||
|
});
|
||||||
|
|
||||||
|
await scraper.scrape('https://sakurazaka46.com/s/s46/diary/detail/68008');
|
||||||
|
|
||||||
|
expect(global.fetch).toHaveBeenCalledWith(
|
||||||
|
'https://sakurazaka46.com/s/s46/diary/detail/68008',
|
||||||
|
expect.objectContaining({
|
||||||
|
headers: expect.objectContaining({
|
||||||
|
'User-Agent': expect.stringContaining('Mozilla/5.0'),
|
||||||
|
}),
|
||||||
|
})
|
||||||
|
);
|
||||||
|
});
|
||||||
|
});
|
||||||
|
});
|
||||||
238
apps/backend/src/import/scrapers/sakurazaka-scraper.ts
Normal file
238
apps/backend/src/import/scrapers/sakurazaka-scraper.ts
Normal file
@@ -0,0 +1,238 @@
|
|||||||
|
import { Logger } from '@nestjs/common';
|
||||||
|
import { IWebScraper, WebScraperResult } from '../interfaces/web-scraper.interface';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Web scraper for Sakurazaka46 blog posts
|
||||||
|
* Handles URLs like: https://sakurazaka46.com/s/s46/diary/detail/{id}
|
||||||
|
*/
|
||||||
|
export class SakurazakaScraper implements IWebScraper {
|
||||||
|
private readonly logger = new Logger(SakurazakaScraper.name);
|
||||||
|
private readonly baseUrl = 'sakurazaka46.com';
|
||||||
|
|
||||||
|
canHandle(url: string): boolean {
|
||||||
|
try {
|
||||||
|
const urlObj = new URL(url);
|
||||||
|
return (
|
||||||
|
urlObj.hostname === this.baseUrl ||
|
||||||
|
urlObj.hostname === `www.${this.baseUrl}`
|
||||||
|
);
|
||||||
|
} catch {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async scrape(url: string): Promise<WebScraperResult> {
|
||||||
|
this.logger.log(`Starting to scrape URL: ${url}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Fetch the page content
|
||||||
|
this.logger.debug(`Fetching page content from: ${url}`);
|
||||||
|
const response = await fetch(url, {
|
||||||
|
headers: {
|
||||||
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!response.ok) {
|
||||||
|
this.logger.error(`Failed to fetch page: ${response.status} ${response.statusText}`);
|
||||||
|
throw new Error(`Failed to fetch page: ${response.status} ${response.statusText}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
this.logger.debug(`Page fetched successfully, reading content...`);
|
||||||
|
const html = await response.text();
|
||||||
|
this.logger.debug(`HTML content received: ${html.length} bytes`);
|
||||||
|
|
||||||
|
// Extract content
|
||||||
|
this.logger.debug('Extracting content from HTML...');
|
||||||
|
const content = this.extractContent(html);
|
||||||
|
this.logger.log(`Content extracted: ${content.length} characters`);
|
||||||
|
|
||||||
|
const metadata = this.extractMetadata(html, url);
|
||||||
|
this.logger.log(`Metadata extracted:`, metadata);
|
||||||
|
|
||||||
|
return {
|
||||||
|
content,
|
||||||
|
metadata,
|
||||||
|
};
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(`Failed to scrape Sakurazaka46 blog:`, error);
|
||||||
|
throw new Error(
|
||||||
|
`Failed to scrape Sakurazaka46 blog: ${error instanceof Error ? error.message : 'Unknown error'}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractContent(html: string): string {
|
||||||
|
// Find the article element
|
||||||
|
const articleMatch = html.match(/<article[^>]*>([\s\S]*?)<\/article>/i);
|
||||||
|
if (!articleMatch) {
|
||||||
|
throw new Error('Could not find article content in the page');
|
||||||
|
}
|
||||||
|
|
||||||
|
const article = articleMatch[0];
|
||||||
|
const parts: string[] = [];
|
||||||
|
|
||||||
|
// Extract title
|
||||||
|
const titleMatch = article.match(/<h1[^>]*class="[^"]*title[^"]*"[^>]*>([\s\S]*?)<\/h1>/i);
|
||||||
|
if (titleMatch) {
|
||||||
|
const title = this.stripHtml(titleMatch[1]).trim();
|
||||||
|
if (title) parts.push(`Title: ${title}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract author
|
||||||
|
const authorMatch = article.match(/<p[^>]*class="[^"]*eigo[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
|
||||||
|
if (authorMatch) {
|
||||||
|
const author = this.stripHtml(authorMatch[1]).trim();
|
||||||
|
if (author) parts.push(`Author: ${author}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract date from the calendar structure
|
||||||
|
const yearMatch = article.match(/<span[^>]*class="[^"]*ym-year[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
|
||||||
|
const monthMatch = article.match(/<span[^>]*class="[^"]*ym-month[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
|
||||||
|
const dayMatch = article.match(/<p[^>]*class="[^"]*date[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
|
||||||
|
|
||||||
|
if (yearMatch && monthMatch && dayMatch) {
|
||||||
|
const year = this.stripHtml(yearMatch[1]).trim();
|
||||||
|
const month = this.stripHtml(monthMatch[1]).trim();
|
||||||
|
const day = this.stripHtml(dayMatch[1]).trim();
|
||||||
|
parts.push(`Date: ${year}-${month.padStart(2, '0')}-${day.padStart(2, '0')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract main content from box-article
|
||||||
|
// Find box-article and capture until we hit navigation sections
|
||||||
|
const boxMatch = article.match(/<div[^>]*class="[^"]*box-article[^"]*"[^>]*>([\s\S]*)$/i);
|
||||||
|
if (boxMatch) {
|
||||||
|
let content = boxMatch[1];
|
||||||
|
// Find where navigation starts and cut there
|
||||||
|
const navIndex = content.search(/<div[^>]*class="[^"]*col-l[^"]*"[^>]*>/i);
|
||||||
|
if (navIndex !== -1) {
|
||||||
|
content = content.substring(0, navIndex);
|
||||||
|
}
|
||||||
|
// Also cut at app-button section
|
||||||
|
const appIndex = content.search(/<div[^>]*class="[^"]*app-button[^"]*"/i);
|
||||||
|
if (appIndex !== -1) {
|
||||||
|
content = content.substring(0, appIndex);
|
||||||
|
}
|
||||||
|
const mainContent = this.cleanBlogContent(content);
|
||||||
|
if (mainContent) {
|
||||||
|
parts.push('');
|
||||||
|
parts.push(mainContent);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return parts.join('\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
private extractMetadata(html: string, url: string): WebScraperResult['metadata'] {
|
||||||
|
const metadata: WebScraperResult['metadata'] = {
|
||||||
|
sourceName: 'Sakurazaka46 Blog',
|
||||||
|
url,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Extract title
|
||||||
|
const titleMatch = html.match(/<h1[^>]*class="[^"]*title[^"]*"[^>]*>([\s\S]*?)<\/h1>/i);
|
||||||
|
if (titleMatch) {
|
||||||
|
metadata.title = this.stripHtml(titleMatch[1]).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract author
|
||||||
|
const authorMatch = html.match(/<p[^>]*class="[^"]*eigo[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
|
||||||
|
if (authorMatch) {
|
||||||
|
metadata.author = this.stripHtml(authorMatch[1]).trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract date
|
||||||
|
const yearMatch = html.match(/<span[^>]*class="[^"]*ym-year[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
|
||||||
|
const monthMatch = html.match(/<span[^>]*class="[^"]*ym-month[^"]*"[^>]*>([\s\S]*?)<\/span>/i);
|
||||||
|
const dayMatch = html.match(/<p[^>]*class="[^"]*date[^"]*"[^>]*>([\s\S]*?)<\/p>/i);
|
||||||
|
|
||||||
|
if (yearMatch && monthMatch && dayMatch) {
|
||||||
|
const year = this.stripHtml(yearMatch[1]).trim();
|
||||||
|
const month = this.stripHtml(monthMatch[1]).trim().padStart(2, '0');
|
||||||
|
const day = this.stripHtml(dayMatch[1]).trim().padStart(2, '0');
|
||||||
|
metadata.publishedDate = `${year}-${month}-${day}`;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract blog ID from URL
|
||||||
|
const idMatch = url.match(/detail\/(\d+)/);
|
||||||
|
if (idMatch) {
|
||||||
|
metadata.blogId = idMatch[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
return metadata;
|
||||||
|
}
|
||||||
|
|
||||||
|
private stripHtml(html: string): string {
|
||||||
|
return html
|
||||||
|
.replace(/<br\s*\/?>/gi, '\n')
|
||||||
|
.replace(/<\/p>/gi, '\n')
|
||||||
|
.replace(/<[^>]+>/g, '')
|
||||||
|
.replace(/ /g, ' ')
|
||||||
|
.replace(/</g, '<')
|
||||||
|
.replace(/>/g, '>')
|
||||||
|
.replace(/&/g, '&')
|
||||||
|
.replace(/"/g, '"')
|
||||||
|
.replace(/'/g, "'")
|
||||||
|
.trim();
|
||||||
|
}
|
||||||
|
|
||||||
|
private cleanBlogContent(html: string): string {
|
||||||
|
// Remove HTML comments
|
||||||
|
html = html.replace(/<!--[\s\S]*?-->/g, '');
|
||||||
|
|
||||||
|
// Remove blog navigation/footer section
|
||||||
|
html = html.replace(/<div[^>]*class="[^"]*blog-foot-nav[^"]*"[\s\S]*$/i, '');
|
||||||
|
|
||||||
|
// Remove app button section
|
||||||
|
html = html.replace(/<div[^>]*class="[^"]*app-button[^"]*"[\s\S]*$/i, '');
|
||||||
|
|
||||||
|
// Remove script and style tags
|
||||||
|
let cleaned = html
|
||||||
|
.replace(/<script[^>]*>[\s\S]*?<\/script>/gi, '')
|
||||||
|
.replace(/<style[^>]*>[\s\S]*?<\/style>/gi, '');
|
||||||
|
|
||||||
|
// Convert <br> tags to newlines
|
||||||
|
cleaned = cleaned.replace(/<br\s*\/?>/gi, '\n');
|
||||||
|
|
||||||
|
// Handle image tags - convert to descriptive text
|
||||||
|
cleaned = cleaned.replace(/<img[^>]*src="([^"]*)"[^>]*>/gi, (match, src) => {
|
||||||
|
// Extract filename from src
|
||||||
|
const filename = src.split('/').pop();
|
||||||
|
return `\n[Image: ${filename}]\n`;
|
||||||
|
});
|
||||||
|
|
||||||
|
// Convert paragraph closings to double newlines
|
||||||
|
cleaned = cleaned.replace(/<\/p>/gi, '\n\n');
|
||||||
|
|
||||||
|
// Convert div closings to single newlines
|
||||||
|
cleaned = cleaned.replace(/<\/div>/gi, '\n');
|
||||||
|
|
||||||
|
// Remove remaining HTML tags
|
||||||
|
cleaned = cleaned.replace(/<[^>]+>/g, '');
|
||||||
|
|
||||||
|
// Decode HTML entities
|
||||||
|
cleaned = cleaned
|
||||||
|
.replace(/ /g, ' ')
|
||||||
|
.replace(/</g, '<')
|
||||||
|
.replace(/>/g, '>')
|
||||||
|
.replace(/&/g, '&')
|
||||||
|
.replace(/"/g, '"')
|
||||||
|
.replace(/'/g, "'");
|
||||||
|
|
||||||
|
// Clean up excessive whitespace
|
||||||
|
cleaned = cleaned
|
||||||
|
.replace(/\n{4,}/g, '\n\n\n')
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
// Remove trailing empty image references
|
||||||
|
cleaned = cleaned.replace(/\n*\[Image:[^\]]*\]\s*$/g, '');
|
||||||
|
|
||||||
|
// Remove trailing author/date section if present
|
||||||
|
cleaned = cleaned.replace(/\s+村井\s*優\s*\d{4}\/\d{2}\/\d{2}\s*\d{2}:\d{2}\s*$/g, '');
|
||||||
|
|
||||||
|
// Final trim
|
||||||
|
cleaned = cleaned.trim();
|
||||||
|
|
||||||
|
return cleaned;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,13 +1,15 @@
|
|||||||
import { Injectable } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { IEmbeddingProvider } from './interfaces/embedding-provider.interface';
|
import { IEmbeddingProvider } from './interfaces/embedding-provider.interface';
|
||||||
import { LocalEmbeddingProvider } from './providers/local-embedding.provider';
|
import { LocalEmbeddingProvider } from './providers/local-embedding.provider';
|
||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class EmbeddingService {
|
export class EmbeddingService {
|
||||||
|
private readonly logger = new Logger(EmbeddingService.name);
|
||||||
private provider: IEmbeddingProvider;
|
private provider: IEmbeddingProvider;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
const providerType = process.env.EMBEDDING_PROVIDER || 'local';
|
const providerType = process.env.EMBEDDING_PROVIDER || 'local';
|
||||||
|
this.logger.log(`Initializing EmbeddingService with provider: ${providerType}`);
|
||||||
|
|
||||||
switch (providerType) {
|
switch (providerType) {
|
||||||
case 'local':
|
case 'local':
|
||||||
@@ -19,7 +21,11 @@ export class EmbeddingService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
async embed(text: string): Promise<number[]> {
|
async embed(text: string): Promise<number[]> {
|
||||||
return this.provider.embed(text);
|
this.logger.debug(`Generating embedding for text (${text.length} chars)...`);
|
||||||
|
const startTime = Date.now();
|
||||||
|
const result = await this.provider.embed(text);
|
||||||
|
this.logger.debug(`Embedding generated in ${Date.now() - startTime}ms`);
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Injectable } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { EmbeddingService } from './embedding.service';
|
import { EmbeddingService } from './embedding.service';
|
||||||
import { VectorStoreService, SearchResult } from './vector-store.service';
|
import { VectorStoreService, SearchResult } from './vector-store.service';
|
||||||
import { MemoryType } from '@prisma/client';
|
import { MemoryType } from '@prisma/client';
|
||||||
@@ -11,6 +11,8 @@ export interface MemoryContext {
|
|||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class MemoryService {
|
export class MemoryService {
|
||||||
|
private readonly logger = new Logger(MemoryService.name);
|
||||||
|
|
||||||
constructor(
|
constructor(
|
||||||
private embeddingService: EmbeddingService,
|
private embeddingService: EmbeddingService,
|
||||||
private vectorStore: VectorStoreService,
|
private vectorStore: VectorStoreService,
|
||||||
@@ -26,8 +28,15 @@ export class MemoryService {
|
|||||||
metadata?: any;
|
metadata?: any;
|
||||||
},
|
},
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
const startTime = Date.now();
|
||||||
|
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Generating embedding for content (${content.length} chars)...`);
|
||||||
|
|
||||||
const embedding = await this.embeddingService.embed(content);
|
const embedding = await this.embeddingService.embed(content);
|
||||||
|
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Embedding generated in ${Date.now() - startTime}ms, dimension: ${embedding.length}`);
|
||||||
|
|
||||||
|
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Storing in vector store...`);
|
||||||
await this.vectorStore.store(content, embedding, memoryType, options);
|
await this.vectorStore.store(content, embedding, memoryType, options);
|
||||||
|
this.logger.debug(`[${options.knowledgeId || options.conversationId}] Stored in vector store in ${Date.now() - startTime}ms`);
|
||||||
}
|
}
|
||||||
|
|
||||||
async retrieveRelevantMemories(
|
async retrieveRelevantMemories(
|
||||||
@@ -40,9 +49,20 @@ export class MemoryService {
|
|||||||
memoryType?: MemoryType;
|
memoryType?: MemoryType;
|
||||||
},
|
},
|
||||||
): Promise<MemoryContext[]> {
|
): Promise<MemoryContext[]> {
|
||||||
|
const { limit = 5, threshold = 0.6, conversationId, characterId, memoryType } = options;
|
||||||
|
|
||||||
|
this.logger.debug(
|
||||||
|
`[retrieveRelevantMemories] Query: "${query.substring(0, 100)}...", type: ${memoryType}, characterId: ${characterId}, conversationId: ${conversationId}, threshold: ${threshold}`,
|
||||||
|
);
|
||||||
|
|
||||||
const embedding = await this.embeddingService.embed(query);
|
const embedding = await this.embeddingService.embed(query);
|
||||||
const results = await this.vectorStore.searchSimilar(embedding, options);
|
const results = await this.vectorStore.searchSimilar(embedding, options);
|
||||||
|
|
||||||
|
this.logger.debug(`[retrieveRelevantMemories] Found ${results.length} results for type ${memoryType}:`);
|
||||||
|
results.forEach((r, i) => {
|
||||||
|
this.logger.debug(` [${i}] similarity: ${r.similarity.toFixed(4)}, content: "${r.content.substring(0, 80)}..."`);
|
||||||
|
});
|
||||||
|
|
||||||
return results.map((result) => ({
|
return results.map((result) => ({
|
||||||
content: result.content,
|
content: result.content,
|
||||||
metadata: result.metadata,
|
metadata: result.metadata,
|
||||||
@@ -50,37 +70,44 @@ export class MemoryService {
|
|||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
|
|
||||||
async buildContextForConversation(
|
async buildContextForConversation(conversationId: string, currentMessage: string, characterId: string): Promise<string> {
|
||||||
conversationId: string,
|
this.logger.debug(
|
||||||
currentMessage: string,
|
`[buildContextForConversation] Building context for conversation ${conversationId}, character ${characterId}, message: "${currentMessage.substring(0, 100)}"`,
|
||||||
characterId: string,
|
|
||||||
): Promise<string> {
|
|
||||||
// Retrieve recent conversation memories
|
|
||||||
const conversationMemories = await this.retrieveRelevantMemories(
|
|
||||||
currentMessage,
|
|
||||||
{
|
|
||||||
limit: 3,
|
|
||||||
threshold: 0.6,
|
|
||||||
conversationId,
|
|
||||||
memoryType: 'conversation',
|
|
||||||
},
|
|
||||||
);
|
);
|
||||||
|
|
||||||
// Retrieve character knowledge
|
// Retrieve recent conversation memories
|
||||||
const characterMemories = await this.retrieveRelevantMemories(
|
const conversationMemories = await this.retrieveRelevantMemories(currentMessage, {
|
||||||
currentMessage,
|
limit: 3,
|
||||||
{
|
threshold: 0.6,
|
||||||
limit: 3,
|
conversationId,
|
||||||
threshold: 0.7,
|
memoryType: 'conversation',
|
||||||
characterId,
|
});
|
||||||
memoryType: 'character',
|
|
||||||
},
|
// Retrieve character knowledge - using multilingual embedding model for cross-lingual support
|
||||||
);
|
// Lower threshold (0.3) for cross-lingual matching (English query -> Japanese content)
|
||||||
|
let characterMemories = await this.retrieveRelevantMemories(currentMessage, {
|
||||||
|
limit: 3,
|
||||||
|
threshold: 0.3,
|
||||||
|
characterId,
|
||||||
|
memoryType: 'character',
|
||||||
|
});
|
||||||
|
|
||||||
|
// Fallback: If vector search returns no results, retrieve the most recent knowledge
|
||||||
|
// This ensures the character always has some context for roleplaying
|
||||||
|
if (characterMemories.length === 0) {
|
||||||
|
this.logger.debug(`[buildContextForConversation] No similar memories found, falling back to most recent knowledge`);
|
||||||
|
const recentResults = await this.vectorStore.getRecentByCharacterId(characterId, 2);
|
||||||
|
characterMemories = recentResults.map((r) => ({
|
||||||
|
content: r.content,
|
||||||
|
metadata: r.metadata,
|
||||||
|
similarity: r.similarity,
|
||||||
|
}));
|
||||||
|
}
|
||||||
|
|
||||||
const contextParts: string[] = [];
|
const contextParts: string[] = [];
|
||||||
|
|
||||||
if (characterMemories.length > 0) {
|
if (characterMemories.length > 0) {
|
||||||
contextParts.push('Relevant character knowledge:');
|
contextParts.push('Here are some descriptions of yourself. These knowledge are your history, what you have done, talked, or imagined:');
|
||||||
characterMemories.forEach((memory) => {
|
characterMemories.forEach((memory) => {
|
||||||
contextParts.push(`- ${memory.content}`);
|
contextParts.push(`- ${memory.content}`);
|
||||||
});
|
});
|
||||||
@@ -93,30 +120,26 @@ export class MemoryService {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return contextParts.join('\n');
|
const result = contextParts.join('\n');
|
||||||
|
this.logger.debug(`[buildContextForConversation] Final context (${result.length} chars):\n${result || '(empty)'}`);
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async storeConversationMessage(
|
async storeConversationMessage(content: string, conversationId: string, metadata?: any): Promise<void> {
|
||||||
content: string,
|
|
||||||
conversationId: string,
|
|
||||||
metadata?: any,
|
|
||||||
): Promise<void> {
|
|
||||||
await this.addMemory(content, 'conversation', {
|
await this.addMemory(content, 'conversation', {
|
||||||
conversationId,
|
conversationId,
|
||||||
metadata,
|
metadata,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
async storeCharacterKnowledge(
|
async storeCharacterKnowledge(content: string, characterId: string, knowledgeId: string, metadata?: any): Promise<void> {
|
||||||
content: string,
|
this.logger.debug(`[${knowledgeId}] Storing character knowledge chunk for character: ${characterId}`);
|
||||||
characterId: string,
|
|
||||||
knowledgeId: string,
|
|
||||||
metadata?: any,
|
|
||||||
): Promise<void> {
|
|
||||||
await this.addMemory(content, 'character', {
|
await this.addMemory(content, 'character', {
|
||||||
characterId,
|
characterId,
|
||||||
knowledgeId,
|
knowledgeId,
|
||||||
metadata,
|
metadata,
|
||||||
});
|
});
|
||||||
|
this.logger.debug(`[${knowledgeId}] Character knowledge chunk stored successfully`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Injectable, OnModuleInit } from '@nestjs/common';
|
import { Injectable, OnModuleInit, Logger } from '@nestjs/common';
|
||||||
import { IEmbeddingProvider } from '../interfaces/embedding-provider.interface';
|
import { IEmbeddingProvider } from '../interfaces/embedding-provider.interface';
|
||||||
import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers';
|
import { pipeline, FeatureExtractionPipeline } from '@xenova/transformers';
|
||||||
|
|
||||||
@@ -7,6 +7,8 @@ export class LocalEmbeddingProvider implements IEmbeddingProvider, OnModuleInit
|
|||||||
private extractor: FeatureExtractionPipeline | null = null;
|
private extractor: FeatureExtractionPipeline | null = null;
|
||||||
private readonly modelName: string;
|
private readonly modelName: string;
|
||||||
private readonly dimension: number;
|
private readonly dimension: number;
|
||||||
|
private readonly logger = new Logger(LocalEmbeddingProvider.name);
|
||||||
|
private isLoading = false;
|
||||||
|
|
||||||
constructor() {
|
constructor() {
|
||||||
this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
|
this.modelName = process.env.EMBEDDING_MODEL || 'Xenova/all-MiniLM-L6-v2';
|
||||||
@@ -15,31 +17,72 @@ export class LocalEmbeddingProvider implements IEmbeddingProvider, OnModuleInit
|
|||||||
|
|
||||||
async onModuleInit() {
|
async onModuleInit() {
|
||||||
// Lazy initialization - model will be loaded on first use
|
// Lazy initialization - model will be loaded on first use
|
||||||
|
this.logger.log(`LocalEmbeddingProvider initialized with model: ${this.modelName}`);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async getExtractor(): Promise<FeatureExtractionPipeline> {
|
private async getExtractor(): Promise<FeatureExtractionPipeline> {
|
||||||
if (!this.extractor) {
|
if (!this.extractor) {
|
||||||
this.extractor = await pipeline('feature-extraction', this.modelName, {
|
if (this.isLoading) {
|
||||||
quantized: false, // Use full precision for better quality
|
// Wait for existing load to complete
|
||||||
});
|
while (this.isLoading) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
||||||
|
}
|
||||||
|
if (this.extractor) {
|
||||||
|
return this.extractor;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
this.isLoading = true;
|
||||||
|
this.logger.log(`Loading embedding model: ${this.modelName}...`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Use quantized model to reduce memory usage
|
||||||
|
this.extractor = await pipeline('feature-extraction', this.modelName, {
|
||||||
|
quantized: true,
|
||||||
|
revision: 'main',
|
||||||
|
});
|
||||||
|
this.logger.log('Embedding model loaded successfully');
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error('Failed to load embedding model:', error);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
this.isLoading = false;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return this.extractor;
|
return this.extractor;
|
||||||
}
|
}
|
||||||
|
|
||||||
async embed(text: string): Promise<number[]> {
|
async embed(text: string): Promise<number[]> {
|
||||||
|
// Truncate text to prevent excessive memory usage
|
||||||
|
const maxLength = 512; // Maximum tokens the model can handle efficiently
|
||||||
|
const truncatedText = text.length > maxLength * 4 ? text.substring(0, maxLength * 4) : text;
|
||||||
|
|
||||||
const extractor = await this.getExtractor();
|
const extractor = await this.getExtractor();
|
||||||
const output = await extractor(text, { pooling: 'mean', normalize: true });
|
const output = await extractor(truncatedText, { pooling: 'mean', normalize: true });
|
||||||
return Array.from(output.data as Float32Array);
|
|
||||||
|
// Convert to array - this creates a copy, allowing original to be GC'd
|
||||||
|
const result = Array.from(output.data as Float32Array);
|
||||||
|
|
||||||
|
// Add delay to allow GC between embeddings
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 10));
|
||||||
|
|
||||||
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
async embedBatch(texts: string[]): Promise<number[][]> {
|
async embedBatch(texts: string[]): Promise<number[][]> {
|
||||||
const extractor = await this.getExtractor();
|
const results: number[][] = [];
|
||||||
const outputs = await Promise.all(
|
|
||||||
texts.map((text) =>
|
// Process one at a time to minimize memory spikes
|
||||||
extractor(text, { pooling: 'mean', normalize: true }),
|
for (let i = 0; i < texts.length; i++) {
|
||||||
),
|
results.push(await this.embed(texts[i]));
|
||||||
);
|
|
||||||
return outputs.map((output) => Array.from(output.data as Float32Array));
|
// Add delay every few items to allow GC
|
||||||
|
if (i > 0 && i % 2 === 0) {
|
||||||
|
await new Promise((resolve) => setTimeout(resolve, 50));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
getDimension(): number {
|
getDimension(): number {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Injectable } from '@nestjs/common';
|
import { Injectable, Logger } from '@nestjs/common';
|
||||||
import { PrismaService } from '../prisma/prisma.service';
|
import { PrismaService } from '../prisma/prisma.service';
|
||||||
import { MemoryType, VectorMemory } from '@prisma/client';
|
import { MemoryType, VectorMemory } from '@prisma/client';
|
||||||
|
|
||||||
@@ -12,6 +12,8 @@ export interface SearchResult {
|
|||||||
|
|
||||||
@Injectable()
|
@Injectable()
|
||||||
export class VectorStoreService {
|
export class VectorStoreService {
|
||||||
|
private readonly logger = new Logger(VectorStoreService.name);
|
||||||
|
|
||||||
constructor(private prisma: PrismaService) {}
|
constructor(private prisma: PrismaService) {}
|
||||||
|
|
||||||
async store(
|
async store(
|
||||||
@@ -25,23 +27,35 @@ export class VectorStoreService {
|
|||||||
metadata?: any;
|
metadata?: any;
|
||||||
},
|
},
|
||||||
): Promise<VectorMemory> {
|
): Promise<VectorMemory> {
|
||||||
|
const startTime = Date.now();
|
||||||
const vectorString = `[${embedding.join(',')}]`;
|
const vectorString = `[${embedding.join(',')}]`;
|
||||||
|
const metadataJson = options.metadata ? JSON.stringify(options.metadata) : null;
|
||||||
|
|
||||||
|
this.logger.debug(`[${options.knowledgeId}] Storing vector memory, type: ${memoryType}, vector dim: ${embedding.length}`);
|
||||||
|
|
||||||
return this.prisma.$queryRaw<VectorMemory[]>`
|
try {
|
||||||
INSERT INTO "VectorMemory" (id, content, embedding, "memoryType", metadata, "conversationId", "characterId", "knowledgeId", "createdAt")
|
const result = await this.prisma.$queryRaw<VectorMemory[]>`
|
||||||
VALUES (
|
INSERT INTO "VectorMemory" (id, content, embedding, "memoryType", metadata, "conversationId", "characterId", "knowledgeId", "createdAt")
|
||||||
gen_random_uuid(),
|
VALUES (
|
||||||
${content},
|
gen_random_uuid(),
|
||||||
${vectorString}::vector,
|
${content},
|
||||||
${memoryType},
|
${vectorString}::vector,
|
||||||
${options.metadata ? JSON.stringify(options.metadata) : null}::jsonb,
|
${memoryType},
|
||||||
${options.conversationId || null},
|
${metadataJson}::jsonb,
|
||||||
${options.characterId || null},
|
${options.conversationId || null},
|
||||||
${options.knowledgeId || null},
|
${options.characterId || null},
|
||||||
NOW()
|
${options.knowledgeId || null},
|
||||||
)
|
NOW()
|
||||||
RETURNING *
|
)
|
||||||
`.then((results) => results[0]);
|
RETURNING *
|
||||||
|
`.then((results) => results[0]);
|
||||||
|
|
||||||
|
this.logger.debug(`[${options.knowledgeId}] Vector memory stored in ${Date.now() - startTime}ms, id: ${result.id}`);
|
||||||
|
return result;
|
||||||
|
} catch (error) {
|
||||||
|
this.logger.error(`[${options.knowledgeId}] Failed to store vector memory:`, error);
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
async searchSimilar(
|
async searchSimilar(
|
||||||
@@ -93,7 +107,34 @@ export class VectorStoreService {
|
|||||||
LIMIT $3
|
LIMIT $3
|
||||||
`;
|
`;
|
||||||
|
|
||||||
return this.prisma.$queryRawUnsafe<SearchResult[]>(query, ...params);
|
this.logger.debug(`[searchSimilar] Query params: threshold=${threshold}, limit=${limit}, characterId=${options.characterId}, memoryType=${options.memoryType}`);
|
||||||
|
|
||||||
|
const results = await this.prisma.$queryRawUnsafe<SearchResult[]>(query, ...params);
|
||||||
|
|
||||||
|
this.logger.debug(`[searchSimilar] Found ${results.length} results matching criteria`);
|
||||||
|
|
||||||
|
// Debug: Show all similarities for character knowledge
|
||||||
|
if (options.characterId && options.memoryType === 'character') {
|
||||||
|
const allQuery = `
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
content,
|
||||||
|
"memoryType",
|
||||||
|
metadata,
|
||||||
|
1 - (embedding <=> $1::vector) as similarity
|
||||||
|
FROM "VectorMemory"
|
||||||
|
WHERE "characterId" = $2 AND "memoryType" = $3
|
||||||
|
ORDER BY embedding <=> $1::vector
|
||||||
|
LIMIT 10
|
||||||
|
`;
|
||||||
|
const allResults = await this.prisma.$queryRawUnsafe<SearchResult[]>(allQuery, vectorString, options.characterId, options.memoryType);
|
||||||
|
this.logger.debug(`[searchSimilar] All ${allResults.length} similarities for character ${options.characterId}:`);
|
||||||
|
allResults.forEach((r, i) => {
|
||||||
|
this.logger.debug(` [${i}] similarity=${r.similarity.toFixed(4)}, content="${r.content.substring(0, 50)}..."`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
return results;
|
||||||
}
|
}
|
||||||
|
|
||||||
async deleteByConversation(conversationId: string): Promise<void> {
|
async deleteByConversation(conversationId: string): Promise<void> {
|
||||||
@@ -113,4 +154,28 @@ export class VectorStoreService {
|
|||||||
where: { knowledgeId },
|
where: { knowledgeId },
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieve most recent character knowledge (fallback when similarity search returns nothing)
|
||||||
|
*/
|
||||||
|
async getRecentByCharacterId(characterId: string, limit: number = 2): Promise<SearchResult[]> {
|
||||||
|
this.logger.debug(`[getRecentByCharacterId] Retrieving recent knowledge for character ${characterId}`);
|
||||||
|
|
||||||
|
const results = await this.prisma.$queryRaw<SearchResult[]>`
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
content,
|
||||||
|
"memoryType",
|
||||||
|
metadata,
|
||||||
|
1.0 as similarity
|
||||||
|
FROM "VectorMemory"
|
||||||
|
WHERE "characterId" = ${characterId} AND "memoryType" = 'character'
|
||||||
|
ORDER BY "createdAt" DESC
|
||||||
|
LIMIT ${limit}
|
||||||
|
`;
|
||||||
|
|
||||||
|
this.logger.debug(`[getRecentByCharacterId] Found ${results.length} recent memories`);
|
||||||
|
|
||||||
|
return results;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import { CharacterList } from './pages/CharacterList';
|
|||||||
import { CharacterForm } from './pages/CharacterForm';
|
import { CharacterForm } from './pages/CharacterForm';
|
||||||
import { ConversationList } from './pages/ConversationList';
|
import { ConversationList } from './pages/ConversationList';
|
||||||
import { Chat } from './pages/Chat';
|
import { Chat } from './pages/Chat';
|
||||||
|
import { KnowledgeImport } from './pages/KnowledgeImport';
|
||||||
|
|
||||||
// OAuth Callback Handler - processes tokens from URL before routing
|
// OAuth Callback Handler - processes tokens from URL before routing
|
||||||
function OAuthCallbackHandler({ children }: { children: React.ReactNode }) {
|
function OAuthCallbackHandler({ children }: { children: React.ReactNode }) {
|
||||||
@@ -124,6 +125,15 @@ function App() {
|
|||||||
</PrivateRoute>
|
</PrivateRoute>
|
||||||
}
|
}
|
||||||
/>
|
/>
|
||||||
|
|
||||||
|
<Route
|
||||||
|
path="/characters/:characterId/knowledge"
|
||||||
|
element={
|
||||||
|
<PrivateRoute>
|
||||||
|
<KnowledgeImport />
|
||||||
|
</PrivateRoute>
|
||||||
|
}
|
||||||
|
/>
|
||||||
</Routes>
|
</Routes>
|
||||||
</OAuthCallbackHandler>
|
</OAuthCallbackHandler>
|
||||||
</BrowserRouter>
|
</BrowserRouter>
|
||||||
|
|||||||
@@ -6,7 +6,9 @@
|
|||||||
* OpenAPI spec version: 1.0.0
|
* OpenAPI spec version: 1.0.0
|
||||||
*/
|
*/
|
||||||
import type {
|
import type {
|
||||||
ImportControllerUploadFileBody
|
ImportControllerUploadFileBody,
|
||||||
|
ImportUrlDto,
|
||||||
|
UploadResponseDto,
|
||||||
} from '.././model';
|
} from '.././model';
|
||||||
|
|
||||||
import { customFetch } from '../../mutator/custom-fetch';
|
import { customFetch } from '../../mutator/custom-fetch';
|
||||||
@@ -109,3 +111,29 @@ export const importControllerDeleteKnowledge = async (knowledgeId: string, optio
|
|||||||
);}
|
);}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/**
|
||||||
|
* @summary Import content from URL for character knowledge
|
||||||
|
*/
|
||||||
|
export const getImportControllerImportFromUrlUrl = (characterId: string,) => {
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
return `http://localhost:3000/api/import/characters/${characterId}/url`
|
||||||
|
}
|
||||||
|
|
||||||
|
export const importControllerImportFromUrl = async (characterId: string,
|
||||||
|
importUrlDto: ImportUrlDto, options?: RequestInit): Promise<UploadResponseDto> => {
|
||||||
|
|
||||||
|
return customFetch<UploadResponseDto>(getImportControllerImportFromUrlUrl(characterId),
|
||||||
|
{
|
||||||
|
...options,
|
||||||
|
method: 'POST',
|
||||||
|
headers: {
|
||||||
|
...options?.headers,
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
},
|
||||||
|
body: JSON.stringify(importUrlDto),
|
||||||
|
}
|
||||||
|
);}
|
||||||
|
|||||||
12
apps/frontend/src/api/generated/model/importUrlDto.ts
Normal file
12
apps/frontend/src/api/generated/model/importUrlDto.ts
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
/**
|
||||||
|
* Generated by orval v8.4.2 🍺
|
||||||
|
* Do not edit manually.
|
||||||
|
* DreamChat API
|
||||||
|
* The DreamChat API documentation
|
||||||
|
* OpenAPI spec version: 1.0.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface ImportUrlDto {
|
||||||
|
/** URL to import */
|
||||||
|
url: string;
|
||||||
|
}
|
||||||
@@ -20,7 +20,9 @@ export * from './createCharacterDtoAttributes';
|
|||||||
export * from './createCharacterDtoConfig';
|
export * from './createCharacterDtoConfig';
|
||||||
export * from './createConversationDto';
|
export * from './createConversationDto';
|
||||||
export * from './importControllerUploadFileBody';
|
export * from './importControllerUploadFileBody';
|
||||||
|
export * from './importUrlDto';
|
||||||
export * from './keycloakConfigDto';
|
export * from './keycloakConfigDto';
|
||||||
|
export * from './uploadResponseDto';
|
||||||
export * from './keycloakLoginUrlDto';
|
export * from './keycloakLoginUrlDto';
|
||||||
export * from './loginDto';
|
export * from './loginDto';
|
||||||
export * from './messageResponseDto';
|
export * from './messageResponseDto';
|
||||||
|
|||||||
14
apps/frontend/src/api/generated/model/uploadResponseDto.ts
Normal file
14
apps/frontend/src/api/generated/model/uploadResponseDto.ts
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
/**
|
||||||
|
* Generated by orval v8.4.2 🍺
|
||||||
|
* Do not edit manually.
|
||||||
|
* DreamChat API
|
||||||
|
* The DreamChat API documentation
|
||||||
|
* OpenAPI spec version: 1.0.0
|
||||||
|
*/
|
||||||
|
|
||||||
|
export interface UploadResponseDto {
|
||||||
|
/** Knowledge ID */
|
||||||
|
knowledgeId: string;
|
||||||
|
/** Status message */
|
||||||
|
message: string;
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
import { useEffect, useState } from 'react';
|
import { useEffect, useState } from 'react';
|
||||||
import { useNavigate, useParams, Link } from 'react-router-dom';
|
import { useNavigate, useParams, Link } from 'react-router-dom';
|
||||||
import { useCharacterStore } from '../stores/characterStore';
|
import { useCharacterStore } from '../stores/characterStore';
|
||||||
|
import { importControllerGetCharacterKnowledge } from '../api/generated/import/import';
|
||||||
|
|
||||||
export function CharacterForm() {
|
export function CharacterForm() {
|
||||||
const { id } = useParams<{ id: string }>();
|
const { id } = useParams<{ id: string }>();
|
||||||
@@ -23,10 +24,17 @@ export function CharacterForm() {
|
|||||||
const [avatarUrl, setAvatarUrl] = useState('');
|
const [avatarUrl, setAvatarUrl] = useState('');
|
||||||
const [isPublic, setIsPublic] = useState(false);
|
const [isPublic, setIsPublic] = useState(false);
|
||||||
const [attributes, setAttributes] = useState('{}');
|
const [attributes, setAttributes] = useState('{}');
|
||||||
|
const [knowledgeCount, setKnowledgeCount] = useState(0);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (isEditing && id) {
|
if (isEditing && id) {
|
||||||
getCharacter(id);
|
getCharacter(id);
|
||||||
|
// Fetch knowledge count
|
||||||
|
importControllerGetCharacterKnowledge(id).then((knowledge: any) => {
|
||||||
|
setKnowledgeCount(knowledge?.length || 0);
|
||||||
|
}).catch(() => {
|
||||||
|
setKnowledgeCount(0);
|
||||||
|
});
|
||||||
}
|
}
|
||||||
return () => {
|
return () => {
|
||||||
setCurrentCharacter(null);
|
setCurrentCharacter(null);
|
||||||
@@ -174,6 +182,28 @@ export function CharacterForm() {
|
|||||||
</label>
|
</label>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
{isEditing && id && (
|
||||||
|
<div className="bg-gray-50 rounded-lg p-4">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div>
|
||||||
|
<h3 className="text-sm font-medium text-gray-900">Knowledge Sources</h3>
|
||||||
|
<p className="text-sm text-gray-500">
|
||||||
|
{knowledgeCount === 0
|
||||||
|
? 'No knowledge imported yet'
|
||||||
|
: `${knowledgeCount} knowledge source${knowledgeCount === 1 ? '' : 's'} imported`
|
||||||
|
}
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
<Link
|
||||||
|
to={`/characters/${id}/knowledge`}
|
||||||
|
className="inline-flex items-center px-3 py-1.5 border border-gray-300 rounded text-sm font-medium text-gray-700 bg-white hover:bg-gray-50"
|
||||||
|
>
|
||||||
|
{knowledgeCount === 0 ? 'Import Knowledge' : 'Manage Knowledge'}
|
||||||
|
</Link>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
<div className="flex space-x-4">
|
<div className="flex space-x-4">
|
||||||
<button
|
<button
|
||||||
type="submit"
|
type="submit"
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ import { useCharacterStore } from '../stores/characterStore';
|
|||||||
import { useAuthStore } from '../stores/authStore';
|
import { useAuthStore } from '../stores/authStore';
|
||||||
import type { Message } from '../types';
|
import type { Message } from '../types';
|
||||||
import { io, Socket } from 'socket.io-client';
|
import { io, Socket } from 'socket.io-client';
|
||||||
|
import { chatControllerSendMessage } from '../api/generated/conversations/conversations';
|
||||||
|
|
||||||
const WS_URL = (import.meta.env as unknown as ImportMetaEnv).VITE_WS_URL || 'http://localhost:3000';
|
const WS_URL = (import.meta.env as unknown as ImportMetaEnv).VITE_WS_URL || 'http://localhost:3000';
|
||||||
|
|
||||||
@@ -26,6 +27,7 @@ export function Chat() {
|
|||||||
|
|
||||||
const [message, setMessage] = useState('');
|
const [message, setMessage] = useState('');
|
||||||
const [streamingContent, setStreamingContent] = useState('');
|
const [streamingContent, setStreamingContent] = useState('');
|
||||||
|
const [socketConnected, setSocketConnected] = useState(false);
|
||||||
const messagesEndRef = useRef<HTMLDivElement>(null);
|
const messagesEndRef = useRef<HTMLDivElement>(null);
|
||||||
const socketRef = useRef<Socket | null>(null);
|
const socketRef = useRef<Socket | null>(null);
|
||||||
|
|
||||||
@@ -42,6 +44,17 @@ export function Chat() {
|
|||||||
|
|
||||||
socket.on('connect', () => {
|
socket.on('connect', () => {
|
||||||
console.log('Connected to chat server');
|
console.log('Connected to chat server');
|
||||||
|
setSocketConnected(true);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on('disconnect', () => {
|
||||||
|
console.log('Disconnected from chat server');
|
||||||
|
setSocketConnected(false);
|
||||||
|
});
|
||||||
|
|
||||||
|
socket.on('connect_error', (err) => {
|
||||||
|
console.error('Socket connection error:', err.message);
|
||||||
|
setSocketConnected(false);
|
||||||
});
|
});
|
||||||
|
|
||||||
socket.on('message_chunk', (data: { conversationId: string; chunk: { content: string } }) => {
|
socket.on('message_chunk', (data: { conversationId: string; chunk: { content: string } }) => {
|
||||||
@@ -111,11 +124,30 @@ export function Chat() {
|
|||||||
setStreamingContent('');
|
setStreamingContent('');
|
||||||
setStreaming(true);
|
setStreaming(true);
|
||||||
|
|
||||||
// Send via socket for streaming
|
// Check if socket is connected, otherwise fallback to HTTP
|
||||||
socketRef.current?.emit('send_message', {
|
if (socketRef.current?.connected) {
|
||||||
conversationId,
|
socketRef.current.emit('send_message', {
|
||||||
content,
|
conversationId,
|
||||||
});
|
content,
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
// Fallback to HTTP API when socket is not connected
|
||||||
|
try {
|
||||||
|
const result = await chatControllerSendMessage(conversationId, { content });
|
||||||
|
|
||||||
|
// Add messages to the conversation
|
||||||
|
if (result.userMessage) {
|
||||||
|
addMessage(result.userMessage as Message);
|
||||||
|
}
|
||||||
|
if (result.assistantMessage) {
|
||||||
|
addMessage(result.assistantMessage as Message);
|
||||||
|
}
|
||||||
|
setStreaming(false);
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Failed to send message:', error);
|
||||||
|
setStreaming(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const handleLogout = () => {
|
const handleLogout = () => {
|
||||||
|
|||||||
383
apps/frontend/src/pages/KnowledgeImport.tsx
Normal file
383
apps/frontend/src/pages/KnowledgeImport.tsx
Normal file
@@ -0,0 +1,383 @@
|
|||||||
|
import { useEffect, useState, useCallback } from 'react';
|
||||||
|
import { useParams, Link } from 'react-router-dom';
|
||||||
|
import { useCharacterStore } from '../stores/characterStore';
|
||||||
|
import { importControllerImportFromUrl, importControllerDeleteKnowledge } from '../api/generated/import/import';
|
||||||
|
|
||||||
|
interface KnowledgeItem {
|
||||||
|
id: string;
|
||||||
|
name: string;
|
||||||
|
sourceType: 'file' | 'url' | 'manual';
|
||||||
|
sourceName: string;
|
||||||
|
status: 'pending' | 'processing' | 'completed' | 'failed';
|
||||||
|
createdAt: string;
|
||||||
|
processingInfo?: {
|
||||||
|
error?: string;
|
||||||
|
chunksProcessed?: number;
|
||||||
|
[key: string]: any;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
export function KnowledgeImport() {
|
||||||
|
const { characterId } = useParams<{ characterId: string }>();
|
||||||
|
const { currentCharacter, getCharacter } = useCharacterStore();
|
||||||
|
|
||||||
|
const [url, setUrl] = useState('');
|
||||||
|
const [knowledgeList, setKnowledgeList] = useState<KnowledgeItem[]>([]);
|
||||||
|
const [isLoading, setIsLoading] = useState(false);
|
||||||
|
const [isImporting, setIsImporting] = useState(false);
|
||||||
|
const [error, setError] = useState<string | null>(null);
|
||||||
|
const [successMessage, setSuccessMessage] = useState<string | null>(null);
|
||||||
|
|
||||||
|
// Supported URL patterns for user reference
|
||||||
|
const supportedPatterns = [
|
||||||
|
{ label: 'Sakurazaka46 Blog', pattern: 'sakurazaka46.com/s/s46/diary/detail/' },
|
||||||
|
];
|
||||||
|
|
||||||
|
const fetchKnowledge = useCallback(async () => {
|
||||||
|
if (!characterId) return;
|
||||||
|
|
||||||
|
setIsLoading(true);
|
||||||
|
try {
|
||||||
|
const response = await fetch(`http://localhost:3000/api/import/characters/${characterId}/knowledge`, {
|
||||||
|
headers: {
|
||||||
|
'Authorization': `Bearer ${localStorage.getItem('accessToken')}`,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
if (response.ok) {
|
||||||
|
const data = await response.json();
|
||||||
|
setKnowledgeList(data);
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Silent fail - we'll show empty state
|
||||||
|
} finally {
|
||||||
|
setIsLoading(false);
|
||||||
|
}
|
||||||
|
}, [characterId]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (characterId) {
|
||||||
|
getCharacter(characterId);
|
||||||
|
fetchKnowledge();
|
||||||
|
}
|
||||||
|
}, [characterId, getCharacter, fetchKnowledge]);
|
||||||
|
|
||||||
|
const validateUrl = (inputUrl: string): { valid: boolean; message?: string } => {
|
||||||
|
try {
|
||||||
|
const urlObj = new URL(inputUrl);
|
||||||
|
|
||||||
|
// Check if it's a supported domain
|
||||||
|
const supportedDomains = ['sakurazaka46.com', 'www.sakurazaka46.com'];
|
||||||
|
if (!supportedDomains.includes(urlObj.hostname)) {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
message: `Unsupported website: ${urlObj.hostname}. Currently only Sakurazaka46 blogs are supported.`
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if it's a blog detail page
|
||||||
|
if (!urlObj.pathname.includes('/diary/detail/')) {
|
||||||
|
return {
|
||||||
|
valid: false,
|
||||||
|
message: 'URL does not appear to be a valid blog post. Please use a blog detail URL.'
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
return { valid: true };
|
||||||
|
} catch {
|
||||||
|
return { valid: false, message: 'Invalid URL format' };
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleSubmit = async (e: React.FormEvent) => {
|
||||||
|
e.preventDefault();
|
||||||
|
setError(null);
|
||||||
|
setSuccessMessage(null);
|
||||||
|
|
||||||
|
if (!characterId) {
|
||||||
|
setError('Character ID is missing');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!url.trim()) {
|
||||||
|
setError('Please enter a URL');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate URL before attempting import
|
||||||
|
const validation = validateUrl(url.trim());
|
||||||
|
if (!validation.valid) {
|
||||||
|
setError(validation.message || 'Invalid URL');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
setIsImporting(true);
|
||||||
|
try {
|
||||||
|
const result = await importControllerImportFromUrl(characterId, { url: url.trim() });
|
||||||
|
|
||||||
|
setSuccessMessage(`Import started! Knowledge ID: ${result.knowledgeId}`);
|
||||||
|
setUrl('');
|
||||||
|
|
||||||
|
// Refresh the knowledge list
|
||||||
|
await fetchKnowledge();
|
||||||
|
|
||||||
|
// Clear success message after 5 seconds
|
||||||
|
setTimeout(() => setSuccessMessage(null), 5000);
|
||||||
|
} catch (err: any) {
|
||||||
|
// Handle specific error messages from the backend
|
||||||
|
const errorMessage = err.message || 'Failed to import URL';
|
||||||
|
|
||||||
|
if (errorMessage.includes('Unsupported URL')) {
|
||||||
|
setError('This website is not supported for import. Currently only Sakurazaka46 blogs are supported.');
|
||||||
|
} else if (errorMessage.includes('Failed to fetch') || errorMessage.includes('Failed to scrape')) {
|
||||||
|
setError('Could not fetch content from the URL. Please check the URL is correct and accessible.');
|
||||||
|
} else if (errorMessage.includes('Could not find article')) {
|
||||||
|
setError('Could not find blog content on the page. Please check this is a valid blog post URL.');
|
||||||
|
} else {
|
||||||
|
setError(errorMessage);
|
||||||
|
}
|
||||||
|
} finally {
|
||||||
|
setIsImporting(false);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const handleDelete = async (knowledgeId: string) => {
|
||||||
|
if (!confirm('Are you sure you want to delete this knowledge?')) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
await importControllerDeleteKnowledge(knowledgeId);
|
||||||
|
await fetchKnowledge();
|
||||||
|
} catch (err: any) {
|
||||||
|
setError(err.message || 'Failed to delete knowledge');
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getStatusColor = (status: string) => {
|
||||||
|
switch (status) {
|
||||||
|
case 'completed': return 'bg-green-100 text-green-800';
|
||||||
|
case 'processing': return 'bg-yellow-100 text-yellow-800';
|
||||||
|
case 'failed': return 'bg-red-100 text-red-800';
|
||||||
|
default: return 'bg-gray-100 text-gray-800';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const getStatusIcon = (status: string) => {
|
||||||
|
switch (status) {
|
||||||
|
case 'completed': return '✓';
|
||||||
|
case 'processing': return '⟳';
|
||||||
|
case 'failed': return '✗';
|
||||||
|
default: return '○';
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
const formatDate = (dateString: string) => {
|
||||||
|
return new Date(dateString).toLocaleDateString('en-US', {
|
||||||
|
year: 'numeric',
|
||||||
|
month: 'short',
|
||||||
|
day: 'numeric',
|
||||||
|
hour: '2-digit',
|
||||||
|
minute: '2-digit',
|
||||||
|
});
|
||||||
|
};
|
||||||
|
|
||||||
|
const truncateUrl = (url: string, maxLength: number = 50) => {
|
||||||
|
if (url.length <= maxLength) return url;
|
||||||
|
return url.substring(0, maxLength) + '...';
|
||||||
|
};
|
||||||
|
|
||||||
|
return (
|
||||||
|
<div className="min-h-screen bg-gray-50">
|
||||||
|
<header className="bg-white shadow">
|
||||||
|
<div className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-4">
|
||||||
|
<div className="flex items-center justify-between">
|
||||||
|
<div className="flex items-center space-x-4">
|
||||||
|
<Link to={`/characters/${characterId}`} className="text-gray-600 hover:text-gray-900">
|
||||||
|
← Back to Character
|
||||||
|
</Link>
|
||||||
|
<h1 className="text-xl font-bold text-gray-900">
|
||||||
|
Knowledge Import
|
||||||
|
{currentCharacter && (
|
||||||
|
<span className="text-gray-500 font-normal"> - {currentCharacter.name}</span>
|
||||||
|
)}
|
||||||
|
</h1>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</header>
|
||||||
|
|
||||||
|
<main className="max-w-7xl mx-auto px-4 sm:px-6 lg:px-8 py-8">
|
||||||
|
<div className="grid grid-cols-1 lg:grid-cols-2 gap-8">
|
||||||
|
{/* Left column - Import form */}
|
||||||
|
<div className="space-y-6">
|
||||||
|
{/* URL Import Card */}
|
||||||
|
<div className="bg-white rounded-lg shadow p-6">
|
||||||
|
<h2 className="text-lg font-semibold text-gray-900 mb-4">
|
||||||
|
Import from URL
|
||||||
|
</h2>
|
||||||
|
|
||||||
|
{error && (
|
||||||
|
<div className="mb-4 bg-red-50 border border-red-200 text-red-700 p-3 rounded">
|
||||||
|
<div className="flex items-start">
|
||||||
|
<span className="mr-2">⚠️</span>
|
||||||
|
<span>{error}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{successMessage && (
|
||||||
|
<div className="mb-4 bg-green-50 border border-green-200 text-green-700 p-3 rounded">
|
||||||
|
<div className="flex items-start">
|
||||||
|
<span className="mr-2">✅</span>
|
||||||
|
<span>{successMessage}</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<form onSubmit={handleSubmit} className="space-y-4">
|
||||||
|
<div>
|
||||||
|
<label htmlFor="url" className="block text-sm font-medium text-gray-700">
|
||||||
|
Blog URL
|
||||||
|
</label>
|
||||||
|
<input
|
||||||
|
type="url"
|
||||||
|
id="url"
|
||||||
|
value={url}
|
||||||
|
onChange={(e) => setUrl(e.target.value)}
|
||||||
|
placeholder="https://sakurazaka46.com/s/s46/diary/detail/68008"
|
||||||
|
className="mt-1 block w-full px-3 py-2 border border-gray-300 rounded-md shadow-sm focus:outline-none focus:ring-indigo-500 focus:border-indigo-500"
|
||||||
|
/>
|
||||||
|
<p className="mt-1 text-sm text-gray-500">
|
||||||
|
Enter a Sakurazaka46 blog URL to import as character knowledge.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button
|
||||||
|
type="submit"
|
||||||
|
disabled={isImporting || !url.trim()}
|
||||||
|
className="w-full flex justify-center py-2 px-4 border border-transparent rounded-md shadow-sm text-sm font-medium text-white bg-indigo-600 hover:bg-indigo-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-indigo-500 disabled:opacity-50 disabled:cursor-not-allowed"
|
||||||
|
>
|
||||||
|
{isImporting ? (
|
||||||
|
<span className="flex items-center">
|
||||||
|
<svg className="animate-spin -ml-1 mr-3 h-5 w-5 text-white" xmlns="http://www.w3.org/2000/svg" fill="none" viewBox="0 0 24 24">
|
||||||
|
<circle className="opacity-25" cx="12" cy="12" r="10" stroke="currentColor" strokeWidth="4"></circle>
|
||||||
|
<path className="opacity-75" fill="currentColor" d="M4 12a8 8 0 018-8V0C5.373 0 0 5.373 0 12h4zm2 5.291A7.962 7.962 0 014 12H0c0 3.042 1.135 5.824 3 7.938l3-2.647z"></path>
|
||||||
|
</svg>
|
||||||
|
Importing...
|
||||||
|
</span>
|
||||||
|
) : (
|
||||||
|
'Import from URL'
|
||||||
|
)}
|
||||||
|
</button>
|
||||||
|
</form>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Supported Sites Card */}
|
||||||
|
<div className="bg-white rounded-lg shadow p-6">
|
||||||
|
<h3 className="text-md font-medium text-gray-900 mb-3">
|
||||||
|
Supported Websites
|
||||||
|
</h3>
|
||||||
|
<ul className="space-y-2">
|
||||||
|
{supportedPatterns.map((pattern, index) => (
|
||||||
|
<li key={index} className="flex items-center text-sm text-gray-600">
|
||||||
|
<span className="w-2 h-2 bg-green-400 rounded-full mr-2"></span>
|
||||||
|
<span className="font-medium">{pattern.label}</span>
|
||||||
|
<code className="ml-2 px-2 py-0.5 bg-gray-100 rounded text-xs">
|
||||||
|
{pattern.pattern}
|
||||||
|
</code>
|
||||||
|
</li>
|
||||||
|
))}
|
||||||
|
</ul>
|
||||||
|
<p className="mt-3 text-xs text-gray-500">
|
||||||
|
More websites will be supported in future updates.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
{/* Right column - Knowledge list */}
|
||||||
|
<div className="bg-white rounded-lg shadow p-6">
|
||||||
|
<h2 className="text-lg font-semibold text-gray-900 mb-4">
|
||||||
|
Knowledge Sources
|
||||||
|
</h2>
|
||||||
|
|
||||||
|
{isLoading ? (
|
||||||
|
<div className="text-center py-8">
|
||||||
|
<div className="animate-spin rounded-full h-8 w-8 border-b-2 border-indigo-600 mx-auto"></div>
|
||||||
|
<p className="mt-2 text-sm text-gray-500">Loading...</p>
|
||||||
|
</div>
|
||||||
|
) : knowledgeList.length === 0 ? (
|
||||||
|
<div className="text-center py-12 bg-gray-50 rounded-lg">
|
||||||
|
<p className="text-gray-500">No knowledge sources yet.</p>
|
||||||
|
<p className="text-sm text-gray-400 mt-1">
|
||||||
|
Import a blog URL to add knowledge to this character.
|
||||||
|
</p>
|
||||||
|
</div>
|
||||||
|
) : (
|
||||||
|
<div className="space-y-4 max-h-[600px] overflow-y-auto">
|
||||||
|
{knowledgeList.map((knowledge) => (
|
||||||
|
<div
|
||||||
|
key={knowledge.id}
|
||||||
|
className="border border-gray-200 rounded-lg p-4 hover:shadow-md transition-shadow"
|
||||||
|
>
|
||||||
|
<div className="flex items-start justify-between">
|
||||||
|
<div className="flex-1 min-w-0">
|
||||||
|
<div className="flex items-center space-x-2">
|
||||||
|
<span className={`inline-flex items-center px-2 py-0.5 rounded text-xs font-medium ${getStatusColor(knowledge.status)}`}>
|
||||||
|
<span className="mr-1">{getStatusIcon(knowledge.status)}</span>
|
||||||
|
{knowledge.status.charAt(0).toUpperCase() + knowledge.status.slice(1)}
|
||||||
|
</span>
|
||||||
|
<span className="text-xs text-gray-500">
|
||||||
|
{knowledge.sourceType === 'url' ? '🔗' : '📄'} {knowledge.sourceType}
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<h3 className="mt-2 text-sm font-medium text-gray-900 truncate">
|
||||||
|
{knowledge.name}
|
||||||
|
</h3>
|
||||||
|
|
||||||
|
{knowledge.sourceType === 'url' && (
|
||||||
|
<p className="mt-1 text-xs text-gray-500 truncate" title={knowledge.sourceName}>
|
||||||
|
{truncateUrl(knowledge.sourceName)}
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
<p className="mt-1 text-xs text-gray-400">
|
||||||
|
{formatDate(knowledge.createdAt)}
|
||||||
|
</p>
|
||||||
|
|
||||||
|
{knowledge.status === 'processing' && (
|
||||||
|
<p className="mt-2 text-xs text-yellow-600">
|
||||||
|
Processing content... This may take a moment.
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{knowledge.status === 'failed' && knowledge.processingInfo?.error && (
|
||||||
|
<p className="mt-2 text-xs text-red-600">
|
||||||
|
Error: {knowledge.processingInfo.error}
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
|
||||||
|
{knowledge.status === 'completed' && knowledge.processingInfo?.chunksProcessed && (
|
||||||
|
<p className="mt-2 text-xs text-green-600">
|
||||||
|
✓ Processed {knowledge.processingInfo.chunksProcessed} chunks
|
||||||
|
</p>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<button
|
||||||
|
onClick={() => handleDelete(knowledge.id)}
|
||||||
|
className="ml-4 text-red-600 hover:text-red-800 text-sm"
|
||||||
|
title="Delete knowledge"
|
||||||
|
>
|
||||||
|
🗑️
|
||||||
|
</button>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
))}
|
||||||
|
</div>
|
||||||
|
)}
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
</main>
|
||||||
|
</div>
|
||||||
|
);
|
||||||
|
}
|
||||||
@@ -902,6 +902,59 @@
|
|||||||
"import"
|
"import"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
},
|
||||||
|
"/api/import/characters/{characterId}/url": {
|
||||||
|
"post": {
|
||||||
|
"operationId": "ImportController_importFromUrl",
|
||||||
|
"parameters": [
|
||||||
|
{
|
||||||
|
"name": "characterId",
|
||||||
|
"required": true,
|
||||||
|
"in": "path",
|
||||||
|
"description": "Character ID",
|
||||||
|
"schema": {
|
||||||
|
"type": "string"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"requestBody": {
|
||||||
|
"required": true,
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/ImportUrlDto"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"responses": {
|
||||||
|
"201": {
|
||||||
|
"description": "URL content is being imported and processed",
|
||||||
|
"content": {
|
||||||
|
"application/json": {
|
||||||
|
"schema": {
|
||||||
|
"$ref": "#/components/schemas/UploadResponseDto"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"400": {
|
||||||
|
"description": "Invalid URL or unsupported website"
|
||||||
|
},
|
||||||
|
"401": {
|
||||||
|
"description": "Unauthorized"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"security": [
|
||||||
|
{
|
||||||
|
"bearer": []
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"summary": "Import content from URL for character knowledge",
|
||||||
|
"tags": [
|
||||||
|
"import"
|
||||||
|
]
|
||||||
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"info": {
|
"info": {
|
||||||
@@ -1462,6 +1515,36 @@
|
|||||||
"userMessage",
|
"userMessage",
|
||||||
"assistantMessage"
|
"assistantMessage"
|
||||||
]
|
]
|
||||||
|
},
|
||||||
|
"ImportUrlDto": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"url": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "URL to import",
|
||||||
|
"example": "https://sakurazaka46.com/s/s46/diary/detail/68008"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"url"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"UploadResponseDto": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"knowledgeId": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Knowledge ID"
|
||||||
|
},
|
||||||
|
"message": {
|
||||||
|
"type": "string",
|
||||||
|
"description": "Status message"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": [
|
||||||
|
"knowledgeId",
|
||||||
|
"message"
|
||||||
|
]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
94
pnpm-lock.yaml
generated
94
pnpm-lock.yaml
generated
@@ -105,6 +105,9 @@ importers:
|
|||||||
'@types/bcrypt':
|
'@types/bcrypt':
|
||||||
specifier: ^6.0.0
|
specifier: ^6.0.0
|
||||||
version: 6.0.0
|
version: 6.0.0
|
||||||
|
'@types/jest':
|
||||||
|
specifier: ^30.0.0
|
||||||
|
version: 30.0.0
|
||||||
'@types/jsonwebtoken':
|
'@types/jsonwebtoken':
|
||||||
specifier: ^9.0.0
|
specifier: ^9.0.0
|
||||||
version: 9.0.10
|
version: 9.0.10
|
||||||
@@ -126,6 +129,9 @@ importers:
|
|||||||
prisma:
|
prisma:
|
||||||
specifier: ^7.4.1
|
specifier: ^7.4.1
|
||||||
version: 7.4.1(@types/react@18.3.28)(react-dom@18.3.1)(react@18.3.1)(typescript@5.9.3)
|
version: 7.4.1(@types/react@18.3.28)(react-dom@18.3.1)(react@18.3.1)(typescript@5.9.3)
|
||||||
|
ts-jest:
|
||||||
|
specifier: ^29.4.6
|
||||||
|
version: 29.4.6(@babel/core@7.29.0)(jest@30.2.0)(typescript@5.9.3)
|
||||||
typescript:
|
typescript:
|
||||||
specifier: ^5.3.0
|
specifier: ^5.3.0
|
||||||
version: 5.9.3
|
version: 5.9.3
|
||||||
@@ -2984,6 +2990,13 @@ packages:
|
|||||||
'@types/istanbul-lib-report': 3.0.3
|
'@types/istanbul-lib-report': 3.0.3
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/@types/jest@30.0.0:
|
||||||
|
resolution: {integrity: sha512-XTYugzhuwqWjws0CVz8QpM36+T+Dz5mTEBKhNs/esGLnCIlGdRy+Dq78NRjd7ls7r8BC8ZRMOrKlkO1hU0JOwA==}
|
||||||
|
dependencies:
|
||||||
|
expect: 30.2.0
|
||||||
|
pretty-format: 30.2.0
|
||||||
|
dev: true
|
||||||
|
|
||||||
/@types/json-schema@7.0.15:
|
/@types/json-schema@7.0.15:
|
||||||
resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==}
|
resolution: {integrity: sha512-5+fP8P8MFNC+AyZCDxrB2pkZFPGzqQWUzpSeuuVLvm8VMcorNYavBqoFcxK8bQz4Qsbn4oUEEem4wDLfcysGHA==}
|
||||||
dev: true
|
dev: true
|
||||||
@@ -3982,6 +3995,13 @@ packages:
|
|||||||
update-browserslist-db: 1.2.3(browserslist@4.28.1)
|
update-browserslist-db: 1.2.3(browserslist@4.28.1)
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/bs-logger@0.2.6:
|
||||||
|
resolution: {integrity: sha512-pd8DCoxmbgc7hyPKOvxtqNcjYoOsABPQdcCUjGp3d42VR2CX1ORhk2A87oqqu5R1kk+76nsxZupkmyd+MVtCog==}
|
||||||
|
engines: {node: '>= 6'}
|
||||||
|
dependencies:
|
||||||
|
fast-json-stable-stringify: 2.1.0
|
||||||
|
dev: true
|
||||||
|
|
||||||
/bser@2.1.1:
|
/bser@2.1.1:
|
||||||
resolution: {integrity: sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==}
|
resolution: {integrity: sha512-gQxTNE/GAfIIrmHLUE3oJyp5FO6HRBfhjnw4/wMmA63ZGDJnWBmgY/lyQBpnDUkGmAhbSe39tx2d/iTOAfglwQ==}
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -5474,6 +5494,19 @@ packages:
|
|||||||
resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==}
|
resolution: {integrity: sha512-Y8T4vYhEfwJOTbouREvG+3XDsjr8E3kIr7uf+JZ0BYloFsttiHU0WfvANVsR7TxNUJa/WpCnw/Ino/p+DeBhBQ==}
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
|
/handlebars@4.7.8:
|
||||||
|
resolution: {integrity: sha512-vafaFqs8MZkRrSX7sFVUdo3ap/eNiLnb4IakshzvP56X5Nr1iGKAIqdX6tMlm6HcNRIkr6AxO5jFEoJzzpT8aQ==}
|
||||||
|
engines: {node: '>=0.4.7'}
|
||||||
|
hasBin: true
|
||||||
|
dependencies:
|
||||||
|
minimist: 1.2.8
|
||||||
|
neo-async: 2.6.2
|
||||||
|
source-map: 0.6.1
|
||||||
|
wordwrap: 1.0.0
|
||||||
|
optionalDependencies:
|
||||||
|
uglify-js: 3.19.3
|
||||||
|
dev: true
|
||||||
|
|
||||||
/has-flag@4.0.0:
|
/has-flag@4.0.0:
|
||||||
resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
|
resolution: {integrity: sha512-EykJT/Q1KjTWctppgIAgfSO0tKVuZUjhgMr17kqTumMl6Afv3EISleU7qZUzoXDFTAHTDC4NOoG/ZxU3EvlMPQ==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
@@ -6462,6 +6495,10 @@ packages:
|
|||||||
resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
|
resolution: {integrity: sha512-0wJxfxH1wgO3GrbuP+dTTk7op+6L41QCXbGINEmD+ny/G/eCqGzxyCsh7159S+mgDDcoarnBw6PC1PS5+wUGgw==}
|
||||||
dev: false
|
dev: false
|
||||||
|
|
||||||
|
/lodash.memoize@4.1.2:
|
||||||
|
resolution: {integrity: sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag==}
|
||||||
|
dev: true
|
||||||
|
|
||||||
/lodash.once@4.1.1:
|
/lodash.once@4.1.1:
|
||||||
resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==}
|
resolution: {integrity: sha512-Sb487aTOCr9drQVL8pIxOzVhafOjZN9UU54hiN8PU3uAiSV7lx1yYNpbNmex2PK6dSJoNTSJUUswT651yww3Mg==}
|
||||||
dev: false
|
dev: false
|
||||||
@@ -6546,6 +6583,10 @@ packages:
|
|||||||
semver: 7.7.4
|
semver: 7.7.4
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/make-error@1.3.6:
|
||||||
|
resolution: {integrity: sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==}
|
||||||
|
dev: true
|
||||||
|
|
||||||
/makeerror@1.0.12:
|
/makeerror@1.0.12:
|
||||||
resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==}
|
resolution: {integrity: sha512-JmqCvUhmt43madlpFzG4BQzG2Z3m6tvQDNKdClZnO3VbIudJYmxsT0FNJMeiB2+JTSlTQTSbU8QdesVmwJcmLg==}
|
||||||
dependencies:
|
dependencies:
|
||||||
@@ -8645,6 +8686,47 @@ packages:
|
|||||||
resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==}
|
resolution: {integrity: sha512-Y/arvbn+rrz3JCKl9C4kVNfTfSm2/mEp5FSz5EsZSANGPSlQrpRI5M4PKF+mJnE52jOO90PnPSc3Ur3bTQw0gA==}
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/ts-jest@29.4.6(@babel/core@7.29.0)(jest@30.2.0)(typescript@5.9.3):
|
||||||
|
resolution: {integrity: sha512-fSpWtOO/1AjSNQguk43hb/JCo16oJDnMJf3CdEGNkqsEX3t0KX96xvyX1D7PfLCpVoKu4MfVrqUkFyblYoY4lA==}
|
||||||
|
engines: {node: ^14.15.0 || ^16.10.0 || ^18.0.0 || >=20.0.0}
|
||||||
|
hasBin: true
|
||||||
|
peerDependencies:
|
||||||
|
'@babel/core': '>=7.0.0-beta.0 <8'
|
||||||
|
'@jest/transform': ^29.0.0 || ^30.0.0
|
||||||
|
'@jest/types': ^29.0.0 || ^30.0.0
|
||||||
|
babel-jest: ^29.0.0 || ^30.0.0
|
||||||
|
esbuild: '*'
|
||||||
|
jest: ^29.0.0 || ^30.0.0
|
||||||
|
jest-util: ^29.0.0 || ^30.0.0
|
||||||
|
typescript: '>=4.3 <6'
|
||||||
|
peerDependenciesMeta:
|
||||||
|
'@babel/core':
|
||||||
|
optional: true
|
||||||
|
'@jest/transform':
|
||||||
|
optional: true
|
||||||
|
'@jest/types':
|
||||||
|
optional: true
|
||||||
|
babel-jest:
|
||||||
|
optional: true
|
||||||
|
esbuild:
|
||||||
|
optional: true
|
||||||
|
jest-util:
|
||||||
|
optional: true
|
||||||
|
dependencies:
|
||||||
|
'@babel/core': 7.29.0
|
||||||
|
bs-logger: 0.2.6
|
||||||
|
fast-json-stable-stringify: 2.1.0
|
||||||
|
handlebars: 4.7.8
|
||||||
|
jest: 30.2.0(@types/node@24.10.13)
|
||||||
|
json5: 2.2.3
|
||||||
|
lodash.memoize: 4.1.2
|
||||||
|
make-error: 1.3.6
|
||||||
|
semver: 7.7.4
|
||||||
|
type-fest: 4.41.0
|
||||||
|
typescript: 5.9.3
|
||||||
|
yargs-parser: 21.1.1
|
||||||
|
dev: true
|
||||||
|
|
||||||
/tsconfck@3.1.6(typescript@5.9.3):
|
/tsconfck@3.1.6(typescript@5.9.3):
|
||||||
resolution: {integrity: sha512-ks6Vjr/jEw0P1gmOVwutM3B7fWxoWBL2KRDb1JfqGVawBmO5UsvmWOQFGHBPl5yxYz4eERr19E6L7NMv+Fej4w==}
|
resolution: {integrity: sha512-ks6Vjr/jEw0P1gmOVwutM3B7fWxoWBL2KRDb1JfqGVawBmO5UsvmWOQFGHBPl5yxYz4eERr19E6L7NMv+Fej4w==}
|
||||||
engines: {node: ^18 || >=20}
|
engines: {node: ^18 || >=20}
|
||||||
@@ -8774,6 +8856,14 @@ packages:
|
|||||||
resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==}
|
resolution: {integrity: sha512-yDJTmhydvl5lJzBmy/hyOAA0d+aqCBuwl818haVdYCRrWV84o7YyeVm4QlVHStqNrrJSTb6jKuFAVqAFsr+K3Q==}
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/uglify-js@3.19.3:
|
||||||
|
resolution: {integrity: sha512-v3Xu+yuwBXisp6QYTcH4UbH+xYJXqnq2m/LtQVWKWzYc1iehYnLixoQDN9FH6/j9/oybfd6W9Ghwkl8+UMKTKQ==}
|
||||||
|
engines: {node: '>=0.8.0'}
|
||||||
|
hasBin: true
|
||||||
|
requiresBuild: true
|
||||||
|
dev: true
|
||||||
|
optional: true
|
||||||
|
|
||||||
/uid@2.0.2:
|
/uid@2.0.2:
|
||||||
resolution: {integrity: sha512-u3xV3X7uzvi5b1MncmZo3i2Aw222Zk1keqLA1YkHldREkAhAqi65wuPfe7lHx8H/Wzy+8CE7S7uS3jekIM5s8g==}
|
resolution: {integrity: sha512-u3xV3X7uzvi5b1MncmZo3i2Aw222Zk1keqLA1YkHldREkAhAqi65wuPfe7lHx8H/Wzy+8CE7S7uS3jekIM5s8g==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
@@ -9117,6 +9207,10 @@ packages:
|
|||||||
stackback: 0.0.2
|
stackback: 0.0.2
|
||||||
dev: true
|
dev: true
|
||||||
|
|
||||||
|
/wordwrap@1.0.0:
|
||||||
|
resolution: {integrity: sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q==}
|
||||||
|
dev: true
|
||||||
|
|
||||||
/wrap-ansi@6.2.0:
|
/wrap-ansi@6.2.0:
|
||||||
resolution: {integrity: sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==}
|
resolution: {integrity: sha512-r6lPcBGxZXlIcymEu7InxDMhdW0KDxpLgoFLcguasxCaJ/SOIZwINatK9KY/tf+ZrlywOKU0UDj3ATXUBfxJXA==}
|
||||||
engines: {node: '>=8'}
|
engines: {node: '>=8'}
|
||||||
|
|||||||
Reference in New Issue
Block a user