add pg vector and embed

This commit is contained in:
2025-08-22 19:34:54 +07:00
parent 21567a0a7c
commit b77beb2d85
27 changed files with 5273 additions and 216 deletions

View File

@@ -2,9 +2,10 @@ import { Module } from '@nestjs/common';
import { AppController } from './app.controller';
import { AppService } from './app.service';
import { IcdModule } from './icd/icd.module';
import { HealthModule } from './health/health.module';
@Module({
imports: [IcdModule],
imports: [IcdModule, HealthModule],
controllers: [AppController],
providers: [AppService],
})

View File

@@ -0,0 +1,83 @@
import { Controller, Get } from '@nestjs/common';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiProperty,
} from '@nestjs/swagger';
export class HealthCheckResponseDto {
@ApiProperty({ example: 'ok' })
status: string;
@ApiProperty({ example: '2024-01-01T00:00:00.000Z' })
timestamp: string;
@ApiProperty({ example: 3600 })
uptime: number;
@ApiProperty({ example: 'development' })
environment: string;
@ApiProperty({ example: '1.0.0' })
version: string;
@ApiProperty({ example: { status: 'connected' } })
database: {
status: string;
};
}
@ApiTags('Health')
@Controller('health')
export class HealthController {
@Get()
@ApiOperation({
summary: 'Health check endpoint',
description:
'Check the health status of the application and its dependencies',
})
@ApiResponse({
status: 200,
description: 'Application is healthy',
type: HealthCheckResponseDto,
})
async getHealth(): Promise<HealthCheckResponseDto> {
return {
status: 'ok',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
environment: process.env.NODE_ENV || 'development',
version: '1.0.0',
database: {
status: 'connected', // In real implementation, check actual DB connection
},
};
}
@Get('ready')
@ApiOperation({
summary: 'Readiness check',
description: 'Check if the application is ready to serve requests',
})
@ApiResponse({
status: 200,
description: 'Application is ready',
})
async getReady() {
return { status: 'ready' };
}
@Get('live')
@ApiOperation({
summary: 'Liveness check',
description: 'Check if the application is alive',
})
@ApiResponse({
status: 200,
description: 'Application is alive',
})
async getLive() {
return { status: 'alive' };
}
}

View File

@@ -0,0 +1,7 @@
import { Module } from '@nestjs/common';
import { HealthController } from './health.controller';
@Module({
controllers: [HealthController],
})
export class HealthModule {}

View File

@@ -0,0 +1,192 @@
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
export class IcdCodeDto {
@ApiProperty({
description: 'Unique identifier for the ICD code',
example: '550e8400-e29b-41d4-a716-446655440000',
})
id: string;
@ApiProperty({
description: 'ICD code',
example: 'E11.9',
})
code: string;
@ApiProperty({
description: 'Description of the ICD code',
example: 'Type 2 diabetes mellitus without complications',
})
display: string;
@ApiProperty({
description: 'Version of the ICD standard',
example: '2024',
})
version: string;
@ApiProperty({
description: 'ICD category',
example: 'ICD10',
enum: ['ICD9', 'ICD10'],
})
category: string;
@ApiProperty({
description: 'Creation timestamp',
example: '2024-01-01T00:00:00.000Z',
})
createdAt: Date;
@ApiProperty({
description: 'Last update timestamp',
example: '2024-01-01T00:00:00.000Z',
})
updatedAt: Date;
}
export class PaginationMetaDto {
@ApiProperty({
description: 'Current page number',
example: 1,
})
currentPage: number;
@ApiProperty({
description: 'Total number of pages',
example: 10,
})
totalPages: number;
@ApiProperty({
description: 'Total number of items',
example: 100,
})
totalItems: number;
@ApiProperty({
description: 'Number of items per page',
example: 10,
})
itemsPerPage: number;
@ApiProperty({
description: 'Whether there is a next page',
example: true,
})
hasNextPage: boolean;
@ApiProperty({
description: 'Whether there is a previous page',
example: false,
})
hasPreviousPage: boolean;
}
export class IcdSearchResponseDto {
@ApiProperty({
description: 'Request success status',
example: true,
})
success: boolean;
@ApiProperty({
description: 'Array of ICD codes',
type: [IcdCodeDto],
})
data: IcdCodeDto[];
@ApiProperty({
description: 'Pagination metadata',
type: PaginationMetaDto,
})
pagination: PaginationMetaDto;
@ApiPropertyOptional({
description: 'Response message',
example: 'ICD codes retrieved successfully',
})
message?: string;
}
export class IcdImportResponseDto {
@ApiProperty({
description: 'Request success status',
example: true,
})
success: boolean;
@ApiProperty({
description: 'Success message',
example: 'ICD data imported successfully',
})
message: string;
@ApiProperty({
description: 'Import statistics',
example: {
icd9Count: 150,
icd10Count: 250,
total: 400,
},
})
data: {
icd9Count: number;
icd10Count: number;
total: number;
};
}
export class IcdStatisticsDto {
@ApiProperty({
description: 'Total number of ICD9 codes',
example: 150,
})
icd9Count: number;
@ApiProperty({
description: 'Total number of ICD10 codes',
example: 250,
})
icd10Count: number;
@ApiProperty({
description: 'Total number of all ICD codes',
example: 400,
})
total: number;
}
export class IcdStatisticsResponseDto {
@ApiProperty({
description: 'Request success status',
example: true,
})
success: boolean;
@ApiProperty({
description: 'ICD statistics data',
type: IcdStatisticsDto,
})
data: IcdStatisticsDto;
}
export class ErrorResponseDto {
@ApiProperty({
description: 'Request success status',
example: false,
})
success: boolean;
@ApiProperty({
description: 'Error message',
example: 'Failed to process request',
})
message: string;
@ApiPropertyOptional({
description: 'Detailed error information',
example: 'Database connection failed',
})
error?: string;
}

View File

@@ -1,6 +1,62 @@
import { ApiPropertyOptional } from '@nestjs/swagger';
import {
IsOptional,
IsString,
IsNumber,
IsEnum,
Min,
Max,
} from 'class-validator';
import { Type } from 'class-transformer';
export enum IcdCategory {
ICD9 = 'ICD9',
ICD10 = 'ICD10',
}
export class SearchIcdDto {
category?: 'ICD9' | 'ICD10';
@ApiPropertyOptional({
description: 'ICD category to filter by',
enum: IcdCategory,
example: 'ICD10',
})
@IsOptional()
@IsEnum(IcdCategory)
category?: IcdCategory;
@ApiPropertyOptional({
description: 'Search term for ICD code or description',
example: 'diabetes',
minLength: 1,
maxLength: 100,
})
@IsOptional()
@IsString()
search?: string;
@ApiPropertyOptional({
description: 'Page number for pagination',
example: 1,
minimum: 1,
default: 1,
})
@IsOptional()
@Type(() => Number)
@IsNumber()
@Min(1)
page?: number;
@ApiPropertyOptional({
description: 'Number of items per page',
example: 10,
minimum: 1,
maximum: 100,
default: 10,
})
@IsOptional()
@Type(() => Number)
@IsNumber()
@Min(1)
@Max(100)
limit?: number;
}

View File

@@ -1,7 +1,22 @@
import { Controller, Get, Post, Query, Logger } from '@nestjs/common';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiQuery,
ApiBadRequestResponse,
ApiInternalServerErrorResponse,
} from '@nestjs/swagger';
import { IcdService } from './icd.service';
import { SearchIcdDto } from './dto/search-icd.dto';
import {
IcdSearchResponseDto,
IcdImportResponseDto,
IcdStatisticsResponseDto,
ErrorResponseDto,
} from './dto/icd-response.dto';
@ApiTags('ICD')
@Controller('icd')
export class IcdController {
private readonly logger = new Logger(IcdController.name);
@@ -9,7 +24,25 @@ export class IcdController {
constructor(private readonly icdService: IcdService) {}
@Post('import')
async importData() {
@ApiOperation({
summary: 'Import ICD data from Excel files',
description:
'Import ICD-9 and ICD-10 codes from Excel files located in the test directory. This operation will process both ICD files and insert/update the database with the latest codes.',
})
@ApiResponse({
status: 200,
description: 'ICD data imported successfully',
type: IcdImportResponseDto,
})
@ApiBadRequestResponse({
description: 'Bad request - Invalid file format or missing files',
type: ErrorResponseDto,
})
@ApiInternalServerErrorResponse({
description: 'Internal server error during import process',
type: ErrorResponseDto,
})
async importData(): Promise<IcdImportResponseDto> {
try {
this.logger.log('Starting ICD data import...');
const result = await this.icdService.importIcdData();
@@ -20,21 +53,62 @@ export class IcdController {
};
} catch (error) {
this.logger.error('Error importing ICD data:', error);
return {
success: false,
message: 'Failed to import ICD data',
error: error.message,
};
throw error;
}
}
@Get('search')
@ApiOperation({
summary: 'Search ICD codes with filters and pagination',
description:
'Search for ICD codes using various filters like category, search term, with pagination support. Returns a paginated list of matching ICD codes.',
})
@ApiQuery({
name: 'category',
required: false,
description: 'Filter by ICD category',
enum: ['ICD9', 'ICD10'],
example: 'ICD10',
})
@ApiQuery({
name: 'search',
required: false,
description: 'Search term for ICD code or description',
example: 'diabetes',
})
@ApiQuery({
name: 'page',
required: false,
description: 'Page number for pagination',
example: 1,
type: 'number',
})
@ApiQuery({
name: 'limit',
required: false,
description: 'Number of items per page (max 100)',
example: 10,
type: 'number',
})
@ApiResponse({
status: 200,
description: 'ICD codes retrieved successfully',
type: IcdSearchResponseDto,
})
@ApiBadRequestResponse({
description: 'Bad request - Invalid query parameters',
type: ErrorResponseDto,
})
@ApiInternalServerErrorResponse({
description: 'Internal server error during search',
type: ErrorResponseDto,
})
async searchIcdCodes(
@Query('category') category?: string,
@Query('search') search?: string,
@Query('page') page?: string,
@Query('limit') limit?: string,
) {
): Promise<IcdSearchResponseDto> {
try {
const pageNum = page ? parseInt(page, 10) : 1;
const limitNum = limit ? parseInt(limit, 10) : 10;
@@ -48,20 +122,38 @@ export class IcdController {
return {
success: true,
...result,
data: result.data,
pagination: {
currentPage: result.page,
totalPages: result.totalPages,
totalItems: result.total,
itemsPerPage: result.limit,
hasNextPage: result.page < result.totalPages,
hasPreviousPage: result.page > 1,
},
};
} catch (error) {
this.logger.error('Error searching ICD codes:', error);
return {
success: false,
message: 'Failed to search ICD codes',
error: error.message,
};
throw error;
}
}
@Get('statistics')
async getStatistics() {
@ApiOperation({
summary: 'Get ICD database statistics',
description:
'Retrieve statistics about the ICD database including total counts for ICD-9 and ICD-10 codes, and last import information.',
})
@ApiResponse({
status: 200,
description: 'Statistics retrieved successfully',
type: IcdStatisticsResponseDto,
})
@ApiInternalServerErrorResponse({
description: 'Internal server error while fetching statistics',
type: ErrorResponseDto,
})
async getStatistics(): Promise<IcdStatisticsResponseDto> {
try {
const stats = await this.icdService.getStatistics();
return {
@@ -70,11 +162,7 @@ export class IcdController {
};
} catch (error) {
this.logger.error('Error getting statistics:', error);
return {
success: false,
message: 'Failed to get statistics',
error: error.message,
};
throw error;
}
}
}

View File

@@ -1,10 +1,12 @@
import { Module } from '@nestjs/common';
import { IcdController } from './icd.controller';
import { IcdService } from './icd.service';
import { PgVectorModule } from './pgvector.module';
@Module({
controllers: [IcdController],
providers: [IcdService],
exports: [IcdService],
imports: [PgVectorModule],
exports: [IcdService, PgVectorModule],
})
export class IcdModule {}

View File

@@ -181,6 +181,16 @@ export class IcdService {
skip,
take: limit,
orderBy: { code: 'asc' },
select: {
id: true,
code: true,
display: true,
version: true,
category: true,
createdAt: true,
updatedAt: true,
// Exclude embedding field to avoid deserialization error
},
}),
this.prisma.icdCode.count({ where }),
]);

View File

@@ -0,0 +1,670 @@
import {
Controller,
Get,
Post,
Query,
Body,
HttpStatus,
ValidationPipe,
UsePipes,
} from '@nestjs/common';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiQuery,
ApiBody,
ApiProperty,
ApiConsumes,
ApiProduces,
} from '@nestjs/swagger';
import { PgVectorService, VectorSearchResult } from './pgvector.service';
export class VectorSearchDto {
@ApiProperty({
description: 'Search query text for vector similarity search',
example: 'diabetes mellitus type 2',
minLength: 1,
maxLength: 500,
})
query: string;
@ApiProperty({
description: 'Maximum number of results to return',
example: 10,
required: false,
minimum: 1,
maximum: 100,
default: 10,
})
limit?: number;
@ApiProperty({
description: 'ICD category filter to narrow down search results',
example: 'ICD10',
required: false,
enum: ['ICD9', 'ICD10'],
default: undefined,
})
category?: string;
@ApiProperty({
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
example: 0.7,
required: false,
minimum: 0.0,
maximum: 1.0,
default: 0.7,
})
threshold?: number;
}
export class EmbeddingRequestDto {
@ApiProperty({
description: 'Text to generate vector embedding for',
example: 'diabetes mellitus',
minLength: 1,
maxLength: 1000,
})
text: string;
@ApiProperty({
description: 'Embedding model to use for generation',
example: 'text-embedding-ada-002',
required: false,
default: 'text-embedding-ada-002',
})
model?: string;
}
export class VectorSearchResponseDto {
@ApiProperty({
description: 'Array of search results with similarity scores',
type: 'array',
items: {
type: 'object',
properties: {
id: {
type: 'string',
description: 'Unique identifier for the ICD code',
example: 'uuid-123',
},
code: {
type: 'string',
description: 'ICD code (e.g., E11.9)',
example: 'E11.9',
},
display: {
type: 'string',
description: 'Human readable description of the ICD code',
example: 'Type 2 diabetes mellitus without complications',
},
version: {
type: 'string',
description: 'ICD version (e.g., ICD-10-CM)',
example: 'ICD-10-CM',
},
category: {
type: 'string',
description: 'ICD category (ICD9 or ICD10)',
example: 'ICD10',
},
similarity: {
type: 'number',
description: 'Similarity score between 0 and 1',
example: 0.89,
},
},
},
})
data: VectorSearchResult[];
@ApiProperty({
description: 'Total number of results found',
example: 5,
minimum: 0,
})
total: number;
@ApiProperty({
description: 'Search query that was used',
example: 'diabetes mellitus type 2',
})
query: string;
}
export class EmbeddingStatsResponseDto {
@ApiProperty({
description: 'Total number of ICD codes in the system',
example: 1000,
minimum: 0,
})
total: number;
@ApiProperty({
description: 'Number of ICD codes with generated embeddings',
example: 500,
minimum: 0,
})
withEmbeddings: number;
@ApiProperty({
description: 'Number of ICD codes without embeddings',
example: 500,
minimum: 0,
})
withoutEmbeddings: number;
@ApiProperty({
description: 'Percentage of codes with embeddings',
example: 50.0,
minimum: 0,
maximum: 100,
})
percentage: number;
@ApiProperty({
description: 'Current status of the vector store',
example: 'Initialized',
enum: ['Initialized', 'Not Initialized', 'Error'],
})
vectorStoreStatus: string;
}
export class VectorStoreStatusDto {
@ApiProperty({
description: 'Whether the vector store is currently initialized',
example: true,
})
initialized: boolean;
@ApiProperty({
description: 'Number of documents currently in the vector store',
example: 1000,
minimum: 0,
})
documentCount: number;
@ApiProperty({
description: 'Embedding model currently being used',
example: 'OpenAI text-embedding-ada-002',
enum: ['OpenAI text-embedding-ada-002', 'Not Available'],
})
embeddingModel: string;
@ApiProperty({
description: 'Timestamp of last vector store update',
example: '2024-01-01T00:00:00.000Z',
})
lastUpdated: Date;
}
export class InitializeResponseDto {
@ApiProperty({
description: 'Success message',
example: 'Pgvector store initialized successfully',
})
message: string;
@ApiProperty({
description: 'Number of documents loaded into vector store',
example: 1000,
minimum: 0,
})
documentCount: number;
}
export class RefreshResponseDto {
@ApiProperty({
description: 'Success message',
example: 'Pgvector store refreshed successfully',
})
message: string;
@ApiProperty({
description: 'Number of documents in refreshed vector store',
example: 1000,
minimum: 0,
})
documentCount: number;
}
export class GenerateEmbeddingResponseDto {
@ApiProperty({
description: 'Generated vector embedding array',
type: 'array',
items: { type: 'number' },
example: [0.1, 0.2, 0.3, -0.1, 0.5],
})
embedding: number[];
@ApiProperty({
description: 'Number of dimensions in the embedding vector',
example: 1536,
minimum: 1,
})
dimensions: number;
@ApiProperty({
description: 'Model used to generate the embedding',
example: 'text-embedding-ada-002',
})
model: string;
}
export class GenerateAllEmbeddingsResponseDto {
@ApiProperty({
description: 'Number of embeddings successfully processed',
example: 500,
minimum: 0,
})
processed: number;
@ApiProperty({
description: 'Number of errors encountered during processing',
example: 0,
minimum: 0,
})
errors: number;
@ApiProperty({
description: 'Summary message of the operation',
example: 'Processed 500 embeddings with 0 errors',
})
message: string;
}
@ApiTags('PgVector Operations')
@Controller('pgvector')
@UsePipes(new ValidationPipe({ transform: true }))
export class PgVectorController {
constructor(private readonly pgVectorService: PgVectorService) {}
@Post('search')
@ApiOperation({
summary: 'PgVector similarity search',
description:
'Search ICD codes using pgvector similarity with the given query. Returns results ordered by similarity score.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiBody({
type: VectorSearchDto,
description: 'Search parameters for pgvector similarity search',
examples: {
diabetes: {
summary: 'Search for diabetes',
value: {
query: 'diabetes mellitus type 2',
limit: 10,
category: 'ICD10',
threshold: 0.7,
},
},
heart: {
summary: 'Search for heart conditions',
value: {
query: 'heart attack myocardial infarction',
limit: 5,
category: 'ICD10',
threshold: 0.8,
},
},
},
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Search results with similarity scores',
type: VectorSearchResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid search parameters or query',
schema: {
type: 'object',
properties: {
statusCode: { type: 'number', example: 400 },
message: { type: 'string', example: 'Query is required' },
error: { type: 'string', example: 'Bad Request' },
},
},
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Internal server error during pgvector search',
schema: {
type: 'object',
properties: {
statusCode: { type: 'number', example: 500 },
message: { type: 'string', example: 'Internal server error' },
error: { type: 'string', example: 'Internal Server Error' },
},
},
})
async vectorSearch(
@Body() searchDto: VectorSearchDto,
): Promise<VectorSearchResponseDto> {
const results = await this.pgVectorService.vectorSearch(
searchDto.query,
searchDto.limit || 10,
searchDto.category,
searchDto.threshold || 0.7,
);
return {
data: results,
total: results.length,
query: searchDto.query,
};
}
@Get('search')
@ApiOperation({
summary: 'PgVector search via GET',
description:
'Search ICD codes using pgvector similarity via query parameters. Alternative to POST method.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiQuery({
name: 'query',
description: 'Search query text for pgvector similarity search',
example: 'diabetes mellitus type 2',
required: true,
type: 'string',
})
@ApiQuery({
name: 'limit',
description: 'Maximum number of results to return',
example: 10,
required: false,
type: 'number',
minimum: 1,
maximum: 100,
})
@ApiQuery({
name: 'category',
description: 'ICD category filter to narrow down search results',
example: 'ICD10',
required: false,
type: 'string',
enum: ['ICD9', 'ICD10'],
})
@ApiQuery({
name: 'threshold',
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
example: 0.7,
required: false,
type: 'number',
minimum: 0.0,
maximum: 1.0,
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Search results with similarity scores',
type: VectorSearchResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid query parameters',
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Internal server error during pgvector search',
})
async vectorSearchGet(
@Query('query') query: string,
@Query('limit') limit?: string,
@Query('category') category?: string,
@Query('threshold') threshold?: string,
): Promise<VectorSearchResponseDto> {
const results = await this.pgVectorService.vectorSearch(
query,
limit ? parseInt(limit) : 10,
category,
threshold ? parseFloat(threshold) : 0.7,
);
return {
data: results,
total: results.length,
query,
};
}
@Post('hybrid-search')
@ApiOperation({
summary: 'Hybrid search (PgVector + Text)',
description:
'Combine pgvector similarity with text search for better and more accurate results. Combines semantic understanding with traditional text matching.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiBody({
type: VectorSearchDto,
description: 'Search parameters for hybrid search',
examples: {
diabetes: {
summary: 'Hybrid search for diabetes',
value: {
query: 'diabetes mellitus type 2',
limit: 15,
category: 'ICD10',
},
},
},
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Hybrid search results combining pgvector and text search',
type: VectorSearchResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid search parameters',
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Internal server error during hybrid search',
})
async hybridSearch(
@Body() searchDto: VectorSearchDto,
): Promise<VectorSearchResponseDto> {
const results = await this.pgVectorService.hybridSearch(
searchDto.query,
searchDto.limit || 10,
searchDto.category,
);
return {
data: results,
total: results.length,
query: searchDto.query,
};
}
@Post('generate-embedding')
@ApiOperation({
summary: 'Generate text embedding',
description:
'Generate vector embedding for the given text using OpenAI. Returns 1536-dimensional vector.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiBody({
type: EmbeddingRequestDto,
description: 'Text to generate embedding for',
examples: {
diabetes: {
summary: 'Generate embedding for diabetes text',
value: {
text: 'diabetes mellitus',
model: 'text-embedding-ada-002',
},
},
heart: {
summary: 'Generate embedding for heart condition',
value: {
text: 'acute myocardial infarction',
model: 'text-embedding-ada-002',
},
},
},
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Generated embedding vector with metadata',
type: GenerateEmbeddingResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid text input',
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error generating embedding',
})
async generateEmbedding(
@Body() requestDto: EmbeddingRequestDto,
): Promise<GenerateEmbeddingResponseDto> {
const embedding = await this.pgVectorService.generateEmbedding(
requestDto.text,
requestDto.model,
);
return {
embedding,
dimensions: embedding.length,
model: requestDto.model || 'text-embedding-ada-002',
};
}
@Post('generate-and-store-all-embeddings')
@ApiOperation({
summary: 'Generate and store embeddings for all ICD codes',
description:
'Batch generate embeddings for all ICD codes and store them in the database with pgvector. This process may take some time depending on the number of codes.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Embedding generation and storage results summary',
type: GenerateAllEmbeddingsResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error during batch embedding generation and storage',
})
async generateAndStoreAllEmbeddings(): Promise<GenerateAllEmbeddingsResponseDto> {
const result = await this.pgVectorService.generateAndStoreAllEmbeddings();
return {
...result,
message: `Processed ${result.processed} embeddings with ${result.errors} errors`,
};
}
@Get('stats')
@ApiOperation({
summary: 'Get embedding statistics',
description:
'Get comprehensive statistics about ICD codes and their embedding status in the pgvector store.',
tags: ['PgVector Operations'],
})
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Embedding statistics and pgvector store status',
type: EmbeddingStatsResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error retrieving statistics',
})
async getEmbeddingStats(): Promise<EmbeddingStatsResponseDto> {
return await this.pgVectorService.getEmbeddingStats();
}
@Get('status')
@ApiOperation({
summary: 'Get pgvector store status',
description:
'Get current operational status of the pgvector store including initialization state and document count.',
tags: ['PgVector Operations'],
})
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Current pgvector store status and configuration',
type: VectorStoreStatusDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error retrieving pgvector store status',
})
async getVectorStoreStatus(): Promise<VectorStoreStatusDto> {
return await this.pgVectorService.getVectorStoreStatus();
}
@Post('initialize')
@ApiOperation({
summary: 'Initialize pgvector store',
description:
'Initialize or reinitialize the pgvector store with all available ICD codes. This loads codes from the database into the pgvector store.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Pgvector store initialization results',
type: InitializeResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error during pgvector store initialization',
})
async initializeVectorStore(): Promise<InitializeResponseDto> {
await this.pgVectorService.initializeVectorStore();
const status = await this.pgVectorService.getVectorStoreStatus();
return {
message: 'Pgvector store initialized successfully',
documentCount: status.documentCount,
};
}
@Post('refresh')
@ApiOperation({
summary: 'Refresh pgvector store',
description:
'Refresh the pgvector store with the latest ICD codes data from the database. Useful after data updates.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Pgvector store refresh results',
type: RefreshResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error during pgvector store refresh',
})
async refreshVectorStore(): Promise<RefreshResponseDto> {
await this.pgVectorService.refreshVectorStore();
const status = await this.pgVectorService.getVectorStoreStatus();
return {
message: 'Pgvector store refreshed successfully',
documentCount: status.documentCount,
};
}
}

View File

@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { PgVectorController } from './pgvector.controller';
import { PgVectorService } from './pgvector.service';
@Module({
controllers: [PgVectorController],
providers: [PgVectorService],
exports: [PgVectorService],
})
export class PgVectorModule {}

611
src/icd/pgvector.service.ts Normal file
View File

@@ -0,0 +1,611 @@
import { Injectable, Logger } from '@nestjs/common';
import { PrismaClient } from '../../generated/prisma';
import { OpenAIEmbeddings } from '@langchain/openai';
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
import { Document } from 'langchain/document';
import { Pool } from 'pg';
export interface VectorSearchResult {
id: string;
code: string;
display: string;
version: string;
category: string;
similarity: number;
}
export interface EmbeddingRequest {
text: string;
model?: string;
}
@Injectable()
export class PgVectorService {
private readonly logger = new Logger(PgVectorService.name);
private readonly prisma = new PrismaClient();
private readonly pool: Pool;
private vectorStore: PGVectorStore | null = null;
private embeddings: OpenAIEmbeddings | null = null;
constructor() {
// Initialize PostgreSQL connection pool
this.pool = new Pool({
connectionString: process.env.DATABASE_URL,
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
this.initializeEmbeddings();
}
/**
* Initialize OpenAI embeddings
*/
private async initializeEmbeddings() {
try {
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
this.logger.error(
'OPENAI_API_KEY not found. Vector operations require OpenAI API key.',
);
throw new Error('OPENAI_API_KEY is required for vector operations');
}
const apiModel = process.env.OPENAI_API_MODEL;
const modelName = apiModel || 'text-embedding-ada-002';
this.embeddings = new OpenAIEmbeddings({
openAIApiKey: apiKey,
modelName: modelName,
maxConcurrency: 5,
});
this.logger.log(
`OpenAI embeddings initialized successfully with model: ${modelName}`,
);
} catch (error) {
this.logger.error('Failed to initialize OpenAI embeddings:', error);
throw new Error(
`Failed to initialize OpenAI embeddings: ${error.message}`,
);
}
}
/**
* Initialize pgvector store dengan LangChain
*/
async initializeVectorStore(): Promise<void> {
try {
this.logger.log('Initializing pgvector store...');
if (!this.embeddings) {
throw new Error(
'OpenAI embeddings not initialized. Cannot create vector store.',
);
}
// Get database connection string
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
throw new Error('DATABASE_URL not found');
}
// Initialize pgvector store without inserting data
this.vectorStore = await PGVectorStore.initialize(this.embeddings, {
postgresConnectionOptions: {
connectionString,
},
tableName: 'icd_codes',
columns: {
idColumnName: 'id',
vectorColumnName: 'embedding',
contentColumnName: 'content',
metadataColumnName: 'metadata',
},
});
this.logger.log('Pgvector store initialized successfully');
} catch (error) {
this.logger.error('Error initializing pgvector store:', error);
throw error;
}
}
/**
* Generate embedding untuk text menggunakan OpenAI
*/
async generateEmbedding(
text: string,
model: string = 'text-embedding-ada-002',
): Promise<number[]> {
try {
this.logger.log(
`Generating embedding for text: ${text.substring(0, 100)}...`,
);
if (!this.embeddings) {
throw new Error(
'OpenAI embeddings not initialized. Please check your API configuration.',
);
}
// Use OpenAI embeddings
const embedding = await this.embeddings.embedQuery(text);
this.logger.log(
`Generated OpenAI embedding with ${embedding.length} dimensions`,
);
return embedding;
} catch (error) {
this.logger.error('Error generating embedding:', error);
throw new Error(`Failed to generate embedding: ${error.message}`);
}
}
/**
* Generate dan simpan embeddings untuk sample ICD codes (default: 100)
*/
async generateAndStoreAllEmbeddings(limit: number = 100): Promise<{
processed: number;
errors: number;
totalSample: number;
}> {
try {
this.logger.log(
`Starting batch embedding generation and storage for sample ${limit} ICD codes...`,
);
// Get sample ICD codes without embeddings using raw SQL
const codesWithoutEmbedding = await this.pool.query(
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL LIMIT $1',
[limit],
);
if (codesWithoutEmbedding.rows.length === 0) {
this.logger.log('All ICD codes already have embeddings');
return { processed: 0, errors: 0, totalSample: 0 };
}
this.logger.log(
`Found ${codesWithoutEmbedding.rows.length} sample codes without embeddings (limited to ${limit})`,
);
let processed = 0;
let errors = 0;
// Process each code
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
const code = codesWithoutEmbedding.rows[i];
try {
// Create text representation for embedding
const text = `${code.code} - ${code.display}`;
// Generate embedding
const embedding = await this.generateEmbedding(text);
// Convert embedding array to proper vector format for pgvector
const vectorString = `[${embedding.join(',')}]`;
// Update database with embedding, metadata, and content using raw SQL
await this.pool.query(
`UPDATE icd_codes
SET embedding = $1::vector,
metadata = $2::jsonb,
content = $3
WHERE id = $4`,
[
vectorString,
JSON.stringify({
id: code.id,
code: code.code,
display: code.display,
version: code.version,
category: code.category,
}),
text,
code.id,
],
);
processed++;
if (processed % 10 === 0) {
this.logger.log(
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings`,
);
}
} catch (error) {
this.logger.error(`Error processing code ${code.code}:`, error);
errors++;
}
}
this.logger.log(
`Sample embedding generation and storage completed. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
);
return {
processed,
errors,
totalSample: codesWithoutEmbedding.rows.length,
};
} catch (error) {
this.logger.error('Error in generateAndStoreAllEmbeddings:', error);
throw error;
}
}
/**
* Generate dan simpan embeddings untuk sample ICD codes dengan kategori tertentu
*/
async generateAndStoreSampleEmbeddingsByCategory(
category: string,
limit: number = 100,
): Promise<{
processed: number;
errors: number;
totalSample: number;
category: string;
}> {
try {
this.logger.log(
`Starting batch embedding generation for sample ${limit} ICD codes in category: ${category}`,
);
// Get sample ICD codes by category without embeddings using raw SQL
const codesWithoutEmbedding = await this.pool.query(
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL AND category = $1 LIMIT $2',
[category, limit],
);
if (codesWithoutEmbedding.rows.length === 0) {
this.logger.log(
`No ICD codes found in category '${category}' without embeddings`,
);
return { processed: 0, errors: 0, totalSample: 0, category };
}
this.logger.log(
`Found ${codesWithoutEmbedding.rows.length} sample codes in category '${category}' without embeddings (limited to ${limit})`,
);
let processed = 0;
let errors = 0;
// Process each code
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
const code = codesWithoutEmbedding.rows[i];
try {
// Create text representation for embedding
const text = `${code.code} - ${code.display}`;
// Generate embedding
const embedding = await this.generateEmbedding(text);
// Convert embedding array to proper vector format for pgvector
const vectorString = `[${embedding.join(',')}]`;
// Update database with embedding, metadata, and content using raw SQL
await this.pool.query(
`UPDATE icd_codes
SET embedding = $1::vector,
metadata = $2::jsonb,
content = $3
WHERE id = $4`,
[
vectorString,
JSON.stringify({
id: code.id,
code: code.code,
display: code.display,
version: code.version,
category: code.category,
}),
text,
code.id,
],
);
processed++;
if (processed % 10 === 0) {
this.logger.log(
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings in category '${category}'`,
);
}
} catch (error) {
this.logger.error(`Error processing code ${code.code}:`, error);
errors++;
}
}
this.logger.log(
`Sample embedding generation completed for category '${category}'. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
);
return {
processed,
errors,
totalSample: codesWithoutEmbedding.rows.length,
category,
};
} catch (error) {
this.logger.error(
`Error in generateAndStoreSampleEmbeddingsByCategory for category '${category}':`,
error,
);
throw error;
}
}
/**
* Vector similarity search menggunakan pgvector
*/
async vectorSearch(
query: string,
limit: number = 10,
category?: string,
threshold: number = 0.7,
): Promise<VectorSearchResult[]> {
try {
this.logger.log(`Performing pgvector search for: ${query}`);
if (!this.embeddings) {
throw new Error('OpenAI embeddings not initialized');
}
// Generate embedding for query
const queryEmbedding = await this.generateEmbedding(query);
// Convert embedding array to proper vector format for pgvector
const vectorString = `[${queryEmbedding.join(',')}]`;
// Build SQL query for vector similarity search
let sql = `
SELECT
id, code, display, version, category,
1 - (embedding <=> $1::vector) as similarity
FROM icd_codes
WHERE embedding IS NOT NULL
`;
const params: any[] = [vectorString];
let paramIndex = 2;
if (category) {
sql += ` AND category = $${paramIndex}`;
params.push(category);
paramIndex++;
}
sql += ` ORDER BY embedding <=> $1::vector ASC LIMIT $${paramIndex}`;
params.push(limit);
// Execute raw SQL query
const result = await this.pool.query(sql, params);
// Transform and filter results
const filteredResults: VectorSearchResult[] = result.rows
.filter((row: any) => row.similarity >= threshold)
.map((row: any) => ({
id: row.id,
code: row.code,
display: row.display,
version: row.version,
category: row.category,
similarity: parseFloat(row.similarity),
}));
this.logger.log(
`Pgvector search returned ${filteredResults.length} results for query: "${query}"`,
);
return filteredResults;
} catch (error) {
this.logger.error('Error in pgvector search:', error);
throw error;
}
}
/**
* Hybrid search: combine vector similarity dengan text search
*/
async hybridSearch(
query: string,
limit: number = 10,
category?: string,
vectorWeight: number = 0.7,
textWeight: number = 0.3,
): Promise<VectorSearchResult[]> {
try {
this.logger.log(`Performing hybrid search for: ${query}`);
// Get vector search results
const vectorResults = await this.vectorSearch(
query,
limit * 2,
category,
0.5,
);
// Get text search results
const textResults = await this.textSearch(query, limit * 2, category);
// Combine and score results
const combinedResults = new Map<string, VectorSearchResult>();
// Add vector results
for (const result of vectorResults) {
combinedResults.set(result.id, {
...result,
similarity: result.similarity * vectorWeight,
});
}
// Add text results with text scoring
for (const result of textResults) {
const existing = combinedResults.get(result.id);
if (existing) {
// Combine scores
existing.similarity += (result.similarity || 0.5) * textWeight;
} else {
combinedResults.set(result.id, {
...result,
similarity: (result.similarity || 0.5) * textWeight,
});
}
}
// Convert to array, sort by combined score, and limit
const results = Array.from(combinedResults.values());
results.sort((a, b) => b.similarity - a.similarity);
return results.slice(0, limit);
} catch (error) {
this.logger.error('Error in hybrid search:', error);
throw error;
}
}
/**
* Text-based search dengan scoring
*/
private async textSearch(
query: string,
limit: number,
category?: string,
): Promise<VectorSearchResult[]> {
try {
let sql = 'SELECT id, code, display, version, category FROM icd_codes';
const params: any[] = [];
let whereConditions: string[] = [];
let paramIndex = 1;
if (category) {
whereConditions.push(`category = $${paramIndex}`);
params.push(category);
paramIndex++;
}
if (query) {
whereConditions.push(
`(code ILIKE $${paramIndex} OR display ILIKE $${paramIndex})`,
);
params.push(`%${query}%`);
paramIndex++;
}
if (whereConditions.length > 0) {
sql += ' WHERE ' + whereConditions.join(' AND ');
}
sql += ' ORDER BY code ASC LIMIT $' + paramIndex;
params.push(limit);
const result = await this.pool.query(sql, params);
return result.rows.map((code) => ({
id: code.id,
code: code.code,
display: code.display,
version: code.version,
category: code.category,
similarity: 0.5, // Default text similarity score
}));
} catch (error) {
this.logger.error('Error in text search:', error);
throw error;
}
}
/**
* Get embedding statistics
*/
async getEmbeddingStats(): Promise<{
total: number;
withEmbeddings: number;
withoutEmbeddings: number;
percentage: number;
vectorStoreStatus: string;
}> {
try {
// Use raw SQL to get embedding statistics
const [totalResult, withEmbeddingsResult] = await Promise.all([
this.pool.query('SELECT COUNT(*) as count FROM icd_codes'),
this.pool.query(
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
),
]);
const total = parseInt(totalResult.rows[0].count);
const withEmbeddings = parseInt(withEmbeddingsResult.rows[0].count);
const withoutEmbeddings = total - withEmbeddings;
const percentage = total > 0 ? (withEmbeddings / total) * 100 : 0;
const vectorStoreStatus = this.vectorStore
? 'Initialized'
: 'Not Initialized';
return {
total,
withEmbeddings,
withoutEmbeddings,
percentage: Math.round(percentage * 100) / 100,
vectorStoreStatus,
};
} catch (error) {
this.logger.error('Error getting embedding stats:', error);
throw error;
}
}
/**
* Refresh vector store dengan data terbaru
*/
async refreshVectorStore(): Promise<void> {
try {
this.logger.log('Refreshing pgvector store...');
await this.initializeVectorStore();
this.logger.log('Pgvector store refreshed successfully');
} catch (error) {
this.logger.error('Error refreshing pgvector store:', error);
throw error;
}
}
/**
* Get vector store status
*/
async getVectorStoreStatus(): Promise<{
initialized: boolean;
documentCount: number;
embeddingModel: string;
lastUpdated: Date;
}> {
try {
// Get document count from database using raw SQL
const result = await this.pool.query(
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
);
const documentCount = parseInt(result.rows[0].count);
const status = {
initialized: !!this.vectorStore,
documentCount,
embeddingModel: this.embeddings
? `OpenAI ${process.env.OPENAI_API_MODEL || 'text-embedding-ada-002'}`
: 'Not Available',
lastUpdated: new Date(),
};
return status;
} catch (error) {
this.logger.error('Error getting vector store status:', error);
throw error;
}
}
/**
* Cleanup resources
*/
async onModuleDestroy() {
await this.prisma.$disconnect();
await this.pool.end();
}
}

View File

@@ -1,8 +1,133 @@
import { NestFactory } from '@nestjs/core';
import { AppModule } from './app.module';
import { Logger, ValidationPipe } from '@nestjs/common';
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
async function bootstrap() {
const logger = new Logger('Bootstrap');
const app = await NestFactory.create(AppModule);
await app.listen(process.env.PORT ?? 3000);
// Environment configuration
const port = process.env.PORT ?? 3000;
const host = process.env.HOST ?? 'localhost';
const nodeEnv = process.env.NODE_ENV ?? 'development';
// CORS Configuration
const corsOrigins = process.env.CORS_ORIGINS?.split(',') ?? [
'http://localhost:3000',
];
const corsMethods = process.env.CORS_METHODS?.split(',') ?? [
'GET',
'HEAD',
'PUT',
'PATCH',
'POST',
'DELETE',
'OPTIONS',
];
const corsHeaders = process.env.CORS_HEADERS?.split(',') ?? [
'Content-Type',
'Accept',
'Authorization',
'X-Requested-With',
];
const corsCredentials = process.env.CORS_CREDENTIALS === 'true';
// Enable CORS
app.enableCors({
origin: corsOrigins,
methods: corsMethods,
allowedHeaders: corsHeaders,
credentials: corsCredentials,
});
// Enable global validation pipe
app.useGlobalPipes(
new ValidationPipe({
whitelist: true,
forbidNonWhitelisted: true,
transform: true,
transformOptions: {
enableImplicitConversion: true,
},
}),
);
// Setup Swagger Documentation
if (process.env.ENABLE_DOCS === 'true') {
const config = new DocumentBuilder()
.setTitle('Claim Guard API')
.setDescription(
'API documentation for Claim Guard Backend - ICD Code Management System',
)
.setVersion('1.0.0')
.setContact(
'Development Team',
'https://github.com/your-org/claim-guard-be',
'dev@yourdomain.com',
)
.setLicense('MIT', 'https://opensource.org/licenses/MIT')
.addServer(
process.env.APP_URL || 'http://localhost:3000',
'Development Server',
)
.addTag('ICD', 'ICD Code management operations')
.addTag('Health', 'Application health and monitoring')
.addBearerAuth(
{
type: 'http',
scheme: 'bearer',
bearerFormat: 'JWT',
name: 'JWT',
description: 'Enter JWT token',
in: 'header',
},
'JWT-auth',
)
.build();
const document = SwaggerModule.createDocument(app, config);
SwaggerModule.setup('docs', app, document, {
swaggerOptions: {
persistAuthorization: true,
docExpansion: 'none',
filter: true,
showRequestDuration: true,
},
customSiteTitle: 'Claim Guard API Documentation',
customfavIcon: '/favicon.ico',
customCss: '.swagger-ui .topbar { display: none }',
});
logger.log(
`📚 Swagger Documentation enabled at: http://${host}:${port}/docs`,
);
}
// Global prefix for API endpoints (optional)
// app.setGlobalPrefix('api/v1');
// Request timeout
const requestTimeout = parseInt(process.env.REQUEST_TIMEOUT ?? '30000');
// Graceful shutdown
app.enableShutdownHooks();
await app.listen(port, host);
logger.log(`🚀 Application is running on: http://${host}:${port}`);
logger.log(`🌍 Environment: ${nodeEnv}`);
logger.log(`🔐 CORS Origins: ${corsOrigins.join(', ')}`);
if (process.env.HEALTH_CHECK_ENABLED === 'true') {
logger.log(
`❤️ Health Check available at: http://${host}:${port}${process.env.HEALTH_CHECK_PATH || '/health'}`,
);
}
}
bootstrap();
bootstrap().catch((error) => {
console.error('❌ Error starting server:', error);
process.exit(1);
});