add pg vector and embed
This commit is contained in:
@@ -2,9 +2,10 @@ import { Module } from '@nestjs/common';
|
||||
import { AppController } from './app.controller';
|
||||
import { AppService } from './app.service';
|
||||
import { IcdModule } from './icd/icd.module';
|
||||
import { HealthModule } from './health/health.module';
|
||||
|
||||
@Module({
|
||||
imports: [IcdModule],
|
||||
imports: [IcdModule, HealthModule],
|
||||
controllers: [AppController],
|
||||
providers: [AppService],
|
||||
})
|
||||
|
||||
83
src/health/health.controller.ts
Normal file
83
src/health/health.controller.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { Controller, Get } from '@nestjs/common';
|
||||
import {
|
||||
ApiTags,
|
||||
ApiOperation,
|
||||
ApiResponse,
|
||||
ApiProperty,
|
||||
} from '@nestjs/swagger';
|
||||
|
||||
export class HealthCheckResponseDto {
|
||||
@ApiProperty({ example: 'ok' })
|
||||
status: string;
|
||||
|
||||
@ApiProperty({ example: '2024-01-01T00:00:00.000Z' })
|
||||
timestamp: string;
|
||||
|
||||
@ApiProperty({ example: 3600 })
|
||||
uptime: number;
|
||||
|
||||
@ApiProperty({ example: 'development' })
|
||||
environment: string;
|
||||
|
||||
@ApiProperty({ example: '1.0.0' })
|
||||
version: string;
|
||||
|
||||
@ApiProperty({ example: { status: 'connected' } })
|
||||
database: {
|
||||
status: string;
|
||||
};
|
||||
}
|
||||
|
||||
@ApiTags('Health')
|
||||
@Controller('health')
|
||||
export class HealthController {
|
||||
@Get()
|
||||
@ApiOperation({
|
||||
summary: 'Health check endpoint',
|
||||
description:
|
||||
'Check the health status of the application and its dependencies',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Application is healthy',
|
||||
type: HealthCheckResponseDto,
|
||||
})
|
||||
async getHealth(): Promise<HealthCheckResponseDto> {
|
||||
return {
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: process.uptime(),
|
||||
environment: process.env.NODE_ENV || 'development',
|
||||
version: '1.0.0',
|
||||
database: {
|
||||
status: 'connected', // In real implementation, check actual DB connection
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@Get('ready')
|
||||
@ApiOperation({
|
||||
summary: 'Readiness check',
|
||||
description: 'Check if the application is ready to serve requests',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Application is ready',
|
||||
})
|
||||
async getReady() {
|
||||
return { status: 'ready' };
|
||||
}
|
||||
|
||||
@Get('live')
|
||||
@ApiOperation({
|
||||
summary: 'Liveness check',
|
||||
description: 'Check if the application is alive',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Application is alive',
|
||||
})
|
||||
async getLive() {
|
||||
return { status: 'alive' };
|
||||
}
|
||||
}
|
||||
7
src/health/health.module.ts
Normal file
7
src/health/health.module.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { HealthController } from './health.controller';
|
||||
|
||||
@Module({
|
||||
controllers: [HealthController],
|
||||
})
|
||||
export class HealthModule {}
|
||||
192
src/icd/dto/icd-response.dto.ts
Normal file
192
src/icd/dto/icd-response.dto.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
|
||||
|
||||
export class IcdCodeDto {
|
||||
@ApiProperty({
|
||||
description: 'Unique identifier for the ICD code',
|
||||
example: '550e8400-e29b-41d4-a716-446655440000',
|
||||
})
|
||||
id: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD code',
|
||||
example: 'E11.9',
|
||||
})
|
||||
code: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Description of the ICD code',
|
||||
example: 'Type 2 diabetes mellitus without complications',
|
||||
})
|
||||
display: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Version of the ICD standard',
|
||||
example: '2024',
|
||||
})
|
||||
version: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD category',
|
||||
example: 'ICD10',
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
})
|
||||
category: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Creation timestamp',
|
||||
example: '2024-01-01T00:00:00.000Z',
|
||||
})
|
||||
createdAt: Date;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Last update timestamp',
|
||||
example: '2024-01-01T00:00:00.000Z',
|
||||
})
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
export class PaginationMetaDto {
|
||||
@ApiProperty({
|
||||
description: 'Current page number',
|
||||
example: 1,
|
||||
})
|
||||
currentPage: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of pages',
|
||||
example: 10,
|
||||
})
|
||||
totalPages: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of items',
|
||||
example: 100,
|
||||
})
|
||||
totalItems: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of items per page',
|
||||
example: 10,
|
||||
})
|
||||
itemsPerPage: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Whether there is a next page',
|
||||
example: true,
|
||||
})
|
||||
hasNextPage: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Whether there is a previous page',
|
||||
example: false,
|
||||
})
|
||||
hasPreviousPage: boolean;
|
||||
}
|
||||
|
||||
export class IcdSearchResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: true,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Array of ICD codes',
|
||||
type: [IcdCodeDto],
|
||||
})
|
||||
data: IcdCodeDto[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Pagination metadata',
|
||||
type: PaginationMetaDto,
|
||||
})
|
||||
pagination: PaginationMetaDto;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Response message',
|
||||
example: 'ICD codes retrieved successfully',
|
||||
})
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export class IcdImportResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: true,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Success message',
|
||||
example: 'ICD data imported successfully',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Import statistics',
|
||||
example: {
|
||||
icd9Count: 150,
|
||||
icd10Count: 250,
|
||||
total: 400,
|
||||
},
|
||||
})
|
||||
data: {
|
||||
icd9Count: number;
|
||||
icd10Count: number;
|
||||
total: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class IcdStatisticsDto {
|
||||
@ApiProperty({
|
||||
description: 'Total number of ICD9 codes',
|
||||
example: 150,
|
||||
})
|
||||
icd9Count: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of ICD10 codes',
|
||||
example: 250,
|
||||
})
|
||||
icd10Count: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of all ICD codes',
|
||||
example: 400,
|
||||
})
|
||||
total: number;
|
||||
}
|
||||
|
||||
export class IcdStatisticsResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: true,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD statistics data',
|
||||
type: IcdStatisticsDto,
|
||||
})
|
||||
data: IcdStatisticsDto;
|
||||
}
|
||||
|
||||
export class ErrorResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: false,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Error message',
|
||||
example: 'Failed to process request',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Detailed error information',
|
||||
example: 'Database connection failed',
|
||||
})
|
||||
error?: string;
|
||||
}
|
||||
@@ -1,6 +1,62 @@
|
||||
import { ApiPropertyOptional } from '@nestjs/swagger';
|
||||
import {
|
||||
IsOptional,
|
||||
IsString,
|
||||
IsNumber,
|
||||
IsEnum,
|
||||
Min,
|
||||
Max,
|
||||
} from 'class-validator';
|
||||
import { Type } from 'class-transformer';
|
||||
|
||||
export enum IcdCategory {
|
||||
ICD9 = 'ICD9',
|
||||
ICD10 = 'ICD10',
|
||||
}
|
||||
|
||||
export class SearchIcdDto {
|
||||
category?: 'ICD9' | 'ICD10';
|
||||
@ApiPropertyOptional({
|
||||
description: 'ICD category to filter by',
|
||||
enum: IcdCategory,
|
||||
example: 'ICD10',
|
||||
})
|
||||
@IsOptional()
|
||||
@IsEnum(IcdCategory)
|
||||
category?: IcdCategory;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Search term for ICD code or description',
|
||||
example: 'diabetes',
|
||||
minLength: 1,
|
||||
maxLength: 100,
|
||||
})
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
search?: string;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Page number for pagination',
|
||||
example: 1,
|
||||
minimum: 1,
|
||||
default: 1,
|
||||
})
|
||||
@IsOptional()
|
||||
@Type(() => Number)
|
||||
@IsNumber()
|
||||
@Min(1)
|
||||
page?: number;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Number of items per page',
|
||||
example: 10,
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
default: 10,
|
||||
})
|
||||
@IsOptional()
|
||||
@Type(() => Number)
|
||||
@IsNumber()
|
||||
@Min(1)
|
||||
@Max(100)
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,22 @@
|
||||
import { Controller, Get, Post, Query, Logger } from '@nestjs/common';
|
||||
import {
|
||||
ApiTags,
|
||||
ApiOperation,
|
||||
ApiResponse,
|
||||
ApiQuery,
|
||||
ApiBadRequestResponse,
|
||||
ApiInternalServerErrorResponse,
|
||||
} from '@nestjs/swagger';
|
||||
import { IcdService } from './icd.service';
|
||||
import { SearchIcdDto } from './dto/search-icd.dto';
|
||||
import {
|
||||
IcdSearchResponseDto,
|
||||
IcdImportResponseDto,
|
||||
IcdStatisticsResponseDto,
|
||||
ErrorResponseDto,
|
||||
} from './dto/icd-response.dto';
|
||||
|
||||
@ApiTags('ICD')
|
||||
@Controller('icd')
|
||||
export class IcdController {
|
||||
private readonly logger = new Logger(IcdController.name);
|
||||
@@ -9,7 +24,25 @@ export class IcdController {
|
||||
constructor(private readonly icdService: IcdService) {}
|
||||
|
||||
@Post('import')
|
||||
async importData() {
|
||||
@ApiOperation({
|
||||
summary: 'Import ICD data from Excel files',
|
||||
description:
|
||||
'Import ICD-9 and ICD-10 codes from Excel files located in the test directory. This operation will process both ICD files and insert/update the database with the latest codes.',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'ICD data imported successfully',
|
||||
type: IcdImportResponseDto,
|
||||
})
|
||||
@ApiBadRequestResponse({
|
||||
description: 'Bad request - Invalid file format or missing files',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
@ApiInternalServerErrorResponse({
|
||||
description: 'Internal server error during import process',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
async importData(): Promise<IcdImportResponseDto> {
|
||||
try {
|
||||
this.logger.log('Starting ICD data import...');
|
||||
const result = await this.icdService.importIcdData();
|
||||
@@ -20,21 +53,62 @@ export class IcdController {
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error importing ICD data:', error);
|
||||
return {
|
||||
success: false,
|
||||
message: 'Failed to import ICD data',
|
||||
error: error.message,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@Get('search')
|
||||
@ApiOperation({
|
||||
summary: 'Search ICD codes with filters and pagination',
|
||||
description:
|
||||
'Search for ICD codes using various filters like category, search term, with pagination support. Returns a paginated list of matching ICD codes.',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'category',
|
||||
required: false,
|
||||
description: 'Filter by ICD category',
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
example: 'ICD10',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'search',
|
||||
required: false,
|
||||
description: 'Search term for ICD code or description',
|
||||
example: 'diabetes',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'page',
|
||||
required: false,
|
||||
description: 'Page number for pagination',
|
||||
example: 1,
|
||||
type: 'number',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'limit',
|
||||
required: false,
|
||||
description: 'Number of items per page (max 100)',
|
||||
example: 10,
|
||||
type: 'number',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'ICD codes retrieved successfully',
|
||||
type: IcdSearchResponseDto,
|
||||
})
|
||||
@ApiBadRequestResponse({
|
||||
description: 'Bad request - Invalid query parameters',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
@ApiInternalServerErrorResponse({
|
||||
description: 'Internal server error during search',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
async searchIcdCodes(
|
||||
@Query('category') category?: string,
|
||||
@Query('search') search?: string,
|
||||
@Query('page') page?: string,
|
||||
@Query('limit') limit?: string,
|
||||
) {
|
||||
): Promise<IcdSearchResponseDto> {
|
||||
try {
|
||||
const pageNum = page ? parseInt(page, 10) : 1;
|
||||
const limitNum = limit ? parseInt(limit, 10) : 10;
|
||||
@@ -48,20 +122,38 @@ export class IcdController {
|
||||
|
||||
return {
|
||||
success: true,
|
||||
...result,
|
||||
data: result.data,
|
||||
pagination: {
|
||||
currentPage: result.page,
|
||||
totalPages: result.totalPages,
|
||||
totalItems: result.total,
|
||||
itemsPerPage: result.limit,
|
||||
hasNextPage: result.page < result.totalPages,
|
||||
hasPreviousPage: result.page > 1,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error searching ICD codes:', error);
|
||||
return {
|
||||
success: false,
|
||||
message: 'Failed to search ICD codes',
|
||||
error: error.message,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@Get('statistics')
|
||||
async getStatistics() {
|
||||
@ApiOperation({
|
||||
summary: 'Get ICD database statistics',
|
||||
description:
|
||||
'Retrieve statistics about the ICD database including total counts for ICD-9 and ICD-10 codes, and last import information.',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Statistics retrieved successfully',
|
||||
type: IcdStatisticsResponseDto,
|
||||
})
|
||||
@ApiInternalServerErrorResponse({
|
||||
description: 'Internal server error while fetching statistics',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
async getStatistics(): Promise<IcdStatisticsResponseDto> {
|
||||
try {
|
||||
const stats = await this.icdService.getStatistics();
|
||||
return {
|
||||
@@ -70,11 +162,7 @@ export class IcdController {
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error getting statistics:', error);
|
||||
return {
|
||||
success: false,
|
||||
message: 'Failed to get statistics',
|
||||
error: error.message,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { IcdController } from './icd.controller';
|
||||
import { IcdService } from './icd.service';
|
||||
import { PgVectorModule } from './pgvector.module';
|
||||
|
||||
@Module({
|
||||
controllers: [IcdController],
|
||||
providers: [IcdService],
|
||||
exports: [IcdService],
|
||||
imports: [PgVectorModule],
|
||||
exports: [IcdService, PgVectorModule],
|
||||
})
|
||||
export class IcdModule {}
|
||||
|
||||
@@ -181,6 +181,16 @@ export class IcdService {
|
||||
skip,
|
||||
take: limit,
|
||||
orderBy: { code: 'asc' },
|
||||
select: {
|
||||
id: true,
|
||||
code: true,
|
||||
display: true,
|
||||
version: true,
|
||||
category: true,
|
||||
createdAt: true,
|
||||
updatedAt: true,
|
||||
// Exclude embedding field to avoid deserialization error
|
||||
},
|
||||
}),
|
||||
this.prisma.icdCode.count({ where }),
|
||||
]);
|
||||
|
||||
670
src/icd/pgvector.controller.ts
Normal file
670
src/icd/pgvector.controller.ts
Normal file
@@ -0,0 +1,670 @@
|
||||
import {
|
||||
Controller,
|
||||
Get,
|
||||
Post,
|
||||
Query,
|
||||
Body,
|
||||
HttpStatus,
|
||||
ValidationPipe,
|
||||
UsePipes,
|
||||
} from '@nestjs/common';
|
||||
import {
|
||||
ApiTags,
|
||||
ApiOperation,
|
||||
ApiResponse,
|
||||
ApiQuery,
|
||||
ApiBody,
|
||||
ApiProperty,
|
||||
ApiConsumes,
|
||||
ApiProduces,
|
||||
} from '@nestjs/swagger';
|
||||
import { PgVectorService, VectorSearchResult } from './pgvector.service';
|
||||
|
||||
export class VectorSearchDto {
|
||||
@ApiProperty({
|
||||
description: 'Search query text for vector similarity search',
|
||||
example: 'diabetes mellitus type 2',
|
||||
minLength: 1,
|
||||
maxLength: 500,
|
||||
})
|
||||
query: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Maximum number of results to return',
|
||||
example: 10,
|
||||
required: false,
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
default: 10,
|
||||
})
|
||||
limit?: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD category filter to narrow down search results',
|
||||
example: 'ICD10',
|
||||
required: false,
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
default: undefined,
|
||||
})
|
||||
category?: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
|
||||
example: 0.7,
|
||||
required: false,
|
||||
minimum: 0.0,
|
||||
maximum: 1.0,
|
||||
default: 0.7,
|
||||
})
|
||||
threshold?: number;
|
||||
}
|
||||
|
||||
export class EmbeddingRequestDto {
|
||||
@ApiProperty({
|
||||
description: 'Text to generate vector embedding for',
|
||||
example: 'diabetes mellitus',
|
||||
minLength: 1,
|
||||
maxLength: 1000,
|
||||
})
|
||||
text: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Embedding model to use for generation',
|
||||
example: 'text-embedding-ada-002',
|
||||
required: false,
|
||||
default: 'text-embedding-ada-002',
|
||||
})
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export class VectorSearchResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Array of search results with similarity scores',
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: {
|
||||
type: 'string',
|
||||
description: 'Unique identifier for the ICD code',
|
||||
example: 'uuid-123',
|
||||
},
|
||||
code: {
|
||||
type: 'string',
|
||||
description: 'ICD code (e.g., E11.9)',
|
||||
example: 'E11.9',
|
||||
},
|
||||
display: {
|
||||
type: 'string',
|
||||
description: 'Human readable description of the ICD code',
|
||||
example: 'Type 2 diabetes mellitus without complications',
|
||||
},
|
||||
version: {
|
||||
type: 'string',
|
||||
description: 'ICD version (e.g., ICD-10-CM)',
|
||||
example: 'ICD-10-CM',
|
||||
},
|
||||
category: {
|
||||
type: 'string',
|
||||
description: 'ICD category (ICD9 or ICD10)',
|
||||
example: 'ICD10',
|
||||
},
|
||||
similarity: {
|
||||
type: 'number',
|
||||
description: 'Similarity score between 0 and 1',
|
||||
example: 0.89,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
data: VectorSearchResult[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of results found',
|
||||
example: 5,
|
||||
minimum: 0,
|
||||
})
|
||||
total: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Search query that was used',
|
||||
example: 'diabetes mellitus type 2',
|
||||
})
|
||||
query: string;
|
||||
}
|
||||
|
||||
export class EmbeddingStatsResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Total number of ICD codes in the system',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
total: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of ICD codes with generated embeddings',
|
||||
example: 500,
|
||||
minimum: 0,
|
||||
})
|
||||
withEmbeddings: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of ICD codes without embeddings',
|
||||
example: 500,
|
||||
minimum: 0,
|
||||
})
|
||||
withoutEmbeddings: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Percentage of codes with embeddings',
|
||||
example: 50.0,
|
||||
minimum: 0,
|
||||
maximum: 100,
|
||||
})
|
||||
percentage: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Current status of the vector store',
|
||||
example: 'Initialized',
|
||||
enum: ['Initialized', 'Not Initialized', 'Error'],
|
||||
})
|
||||
vectorStoreStatus: string;
|
||||
}
|
||||
|
||||
export class VectorStoreStatusDto {
|
||||
@ApiProperty({
|
||||
description: 'Whether the vector store is currently initialized',
|
||||
example: true,
|
||||
})
|
||||
initialized: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of documents currently in the vector store',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
documentCount: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Embedding model currently being used',
|
||||
example: 'OpenAI text-embedding-ada-002',
|
||||
enum: ['OpenAI text-embedding-ada-002', 'Not Available'],
|
||||
})
|
||||
embeddingModel: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Timestamp of last vector store update',
|
||||
example: '2024-01-01T00:00:00.000Z',
|
||||
})
|
||||
lastUpdated: Date;
|
||||
}
|
||||
|
||||
export class InitializeResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Success message',
|
||||
example: 'Pgvector store initialized successfully',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of documents loaded into vector store',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
documentCount: number;
|
||||
}
|
||||
|
||||
export class RefreshResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Success message',
|
||||
example: 'Pgvector store refreshed successfully',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of documents in refreshed vector store',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
documentCount: number;
|
||||
}
|
||||
|
||||
export class GenerateEmbeddingResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Generated vector embedding array',
|
||||
type: 'array',
|
||||
items: { type: 'number' },
|
||||
example: [0.1, 0.2, 0.3, -0.1, 0.5],
|
||||
})
|
||||
embedding: number[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of dimensions in the embedding vector',
|
||||
example: 1536,
|
||||
minimum: 1,
|
||||
})
|
||||
dimensions: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Model used to generate the embedding',
|
||||
example: 'text-embedding-ada-002',
|
||||
})
|
||||
model: string;
|
||||
}
|
||||
|
||||
export class GenerateAllEmbeddingsResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Number of embeddings successfully processed',
|
||||
example: 500,
|
||||
minimum: 0,
|
||||
})
|
||||
processed: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of errors encountered during processing',
|
||||
example: 0,
|
||||
minimum: 0,
|
||||
})
|
||||
errors: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Summary message of the operation',
|
||||
example: 'Processed 500 embeddings with 0 errors',
|
||||
})
|
||||
message: string;
|
||||
}
|
||||
|
||||
@ApiTags('PgVector Operations')
|
||||
@Controller('pgvector')
|
||||
@UsePipes(new ValidationPipe({ transform: true }))
|
||||
export class PgVectorController {
|
||||
constructor(private readonly pgVectorService: PgVectorService) {}
|
||||
|
||||
@Post('search')
|
||||
@ApiOperation({
|
||||
summary: 'PgVector similarity search',
|
||||
description:
|
||||
'Search ICD codes using pgvector similarity with the given query. Returns results ordered by similarity score.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiBody({
|
||||
type: VectorSearchDto,
|
||||
description: 'Search parameters for pgvector similarity search',
|
||||
examples: {
|
||||
diabetes: {
|
||||
summary: 'Search for diabetes',
|
||||
value: {
|
||||
query: 'diabetes mellitus type 2',
|
||||
limit: 10,
|
||||
category: 'ICD10',
|
||||
threshold: 0.7,
|
||||
},
|
||||
},
|
||||
heart: {
|
||||
summary: 'Search for heart conditions',
|
||||
value: {
|
||||
query: 'heart attack myocardial infarction',
|
||||
limit: 5,
|
||||
category: 'ICD10',
|
||||
threshold: 0.8,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Search results with similarity scores',
|
||||
type: VectorSearchResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid search parameters or query',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
statusCode: { type: 'number', example: 400 },
|
||||
message: { type: 'string', example: 'Query is required' },
|
||||
error: { type: 'string', example: 'Bad Request' },
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Internal server error during pgvector search',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
statusCode: { type: 'number', example: 500 },
|
||||
message: { type: 'string', example: 'Internal server error' },
|
||||
error: { type: 'string', example: 'Internal Server Error' },
|
||||
},
|
||||
},
|
||||
})
|
||||
async vectorSearch(
|
||||
@Body() searchDto: VectorSearchDto,
|
||||
): Promise<VectorSearchResponseDto> {
|
||||
const results = await this.pgVectorService.vectorSearch(
|
||||
searchDto.query,
|
||||
searchDto.limit || 10,
|
||||
searchDto.category,
|
||||
searchDto.threshold || 0.7,
|
||||
);
|
||||
|
||||
return {
|
||||
data: results,
|
||||
total: results.length,
|
||||
query: searchDto.query,
|
||||
};
|
||||
}
|
||||
|
||||
@Get('search')
|
||||
@ApiOperation({
|
||||
summary: 'PgVector search via GET',
|
||||
description:
|
||||
'Search ICD codes using pgvector similarity via query parameters. Alternative to POST method.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiQuery({
|
||||
name: 'query',
|
||||
description: 'Search query text for pgvector similarity search',
|
||||
example: 'diabetes mellitus type 2',
|
||||
required: true,
|
||||
type: 'string',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'limit',
|
||||
description: 'Maximum number of results to return',
|
||||
example: 10,
|
||||
required: false,
|
||||
type: 'number',
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'category',
|
||||
description: 'ICD category filter to narrow down search results',
|
||||
example: 'ICD10',
|
||||
required: false,
|
||||
type: 'string',
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'threshold',
|
||||
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
|
||||
example: 0.7,
|
||||
required: false,
|
||||
type: 'number',
|
||||
minimum: 0.0,
|
||||
maximum: 1.0,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Search results with similarity scores',
|
||||
type: VectorSearchResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid query parameters',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Internal server error during pgvector search',
|
||||
})
|
||||
async vectorSearchGet(
|
||||
@Query('query') query: string,
|
||||
@Query('limit') limit?: string,
|
||||
@Query('category') category?: string,
|
||||
@Query('threshold') threshold?: string,
|
||||
): Promise<VectorSearchResponseDto> {
|
||||
const results = await this.pgVectorService.vectorSearch(
|
||||
query,
|
||||
limit ? parseInt(limit) : 10,
|
||||
category,
|
||||
threshold ? parseFloat(threshold) : 0.7,
|
||||
);
|
||||
|
||||
return {
|
||||
data: results,
|
||||
total: results.length,
|
||||
query,
|
||||
};
|
||||
}
|
||||
|
||||
@Post('hybrid-search')
|
||||
@ApiOperation({
|
||||
summary: 'Hybrid search (PgVector + Text)',
|
||||
description:
|
||||
'Combine pgvector similarity with text search for better and more accurate results. Combines semantic understanding with traditional text matching.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiBody({
|
||||
type: VectorSearchDto,
|
||||
description: 'Search parameters for hybrid search',
|
||||
examples: {
|
||||
diabetes: {
|
||||
summary: 'Hybrid search for diabetes',
|
||||
value: {
|
||||
query: 'diabetes mellitus type 2',
|
||||
limit: 15,
|
||||
category: 'ICD10',
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Hybrid search results combining pgvector and text search',
|
||||
type: VectorSearchResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid search parameters',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Internal server error during hybrid search',
|
||||
})
|
||||
async hybridSearch(
|
||||
@Body() searchDto: VectorSearchDto,
|
||||
): Promise<VectorSearchResponseDto> {
|
||||
const results = await this.pgVectorService.hybridSearch(
|
||||
searchDto.query,
|
||||
searchDto.limit || 10,
|
||||
searchDto.category,
|
||||
);
|
||||
|
||||
return {
|
||||
data: results,
|
||||
total: results.length,
|
||||
query: searchDto.query,
|
||||
};
|
||||
}
|
||||
|
||||
@Post('generate-embedding')
|
||||
@ApiOperation({
|
||||
summary: 'Generate text embedding',
|
||||
description:
|
||||
'Generate vector embedding for the given text using OpenAI. Returns 1536-dimensional vector.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiBody({
|
||||
type: EmbeddingRequestDto,
|
||||
description: 'Text to generate embedding for',
|
||||
examples: {
|
||||
diabetes: {
|
||||
summary: 'Generate embedding for diabetes text',
|
||||
value: {
|
||||
text: 'diabetes mellitus',
|
||||
model: 'text-embedding-ada-002',
|
||||
},
|
||||
},
|
||||
heart: {
|
||||
summary: 'Generate embedding for heart condition',
|
||||
value: {
|
||||
text: 'acute myocardial infarction',
|
||||
model: 'text-embedding-ada-002',
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Generated embedding vector with metadata',
|
||||
type: GenerateEmbeddingResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid text input',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error generating embedding',
|
||||
})
|
||||
async generateEmbedding(
|
||||
@Body() requestDto: EmbeddingRequestDto,
|
||||
): Promise<GenerateEmbeddingResponseDto> {
|
||||
const embedding = await this.pgVectorService.generateEmbedding(
|
||||
requestDto.text,
|
||||
requestDto.model,
|
||||
);
|
||||
|
||||
return {
|
||||
embedding,
|
||||
dimensions: embedding.length,
|
||||
model: requestDto.model || 'text-embedding-ada-002',
|
||||
};
|
||||
}
|
||||
|
||||
@Post('generate-and-store-all-embeddings')
|
||||
@ApiOperation({
|
||||
summary: 'Generate and store embeddings for all ICD codes',
|
||||
description:
|
||||
'Batch generate embeddings for all ICD codes and store them in the database with pgvector. This process may take some time depending on the number of codes.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Embedding generation and storage results summary',
|
||||
type: GenerateAllEmbeddingsResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error during batch embedding generation and storage',
|
||||
})
|
||||
async generateAndStoreAllEmbeddings(): Promise<GenerateAllEmbeddingsResponseDto> {
|
||||
const result = await this.pgVectorService.generateAndStoreAllEmbeddings();
|
||||
|
||||
return {
|
||||
...result,
|
||||
message: `Processed ${result.processed} embeddings with ${result.errors} errors`,
|
||||
};
|
||||
}
|
||||
|
||||
@Get('stats')
|
||||
@ApiOperation({
|
||||
summary: 'Get embedding statistics',
|
||||
description:
|
||||
'Get comprehensive statistics about ICD codes and their embedding status in the pgvector store.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Embedding statistics and pgvector store status',
|
||||
type: EmbeddingStatsResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error retrieving statistics',
|
||||
})
|
||||
async getEmbeddingStats(): Promise<EmbeddingStatsResponseDto> {
|
||||
return await this.pgVectorService.getEmbeddingStats();
|
||||
}
|
||||
|
||||
@Get('status')
|
||||
@ApiOperation({
|
||||
summary: 'Get pgvector store status',
|
||||
description:
|
||||
'Get current operational status of the pgvector store including initialization state and document count.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Current pgvector store status and configuration',
|
||||
type: VectorStoreStatusDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error retrieving pgvector store status',
|
||||
})
|
||||
async getVectorStoreStatus(): Promise<VectorStoreStatusDto> {
|
||||
return await this.pgVectorService.getVectorStoreStatus();
|
||||
}
|
||||
|
||||
@Post('initialize')
|
||||
@ApiOperation({
|
||||
summary: 'Initialize pgvector store',
|
||||
description:
|
||||
'Initialize or reinitialize the pgvector store with all available ICD codes. This loads codes from the database into the pgvector store.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Pgvector store initialization results',
|
||||
type: InitializeResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error during pgvector store initialization',
|
||||
})
|
||||
async initializeVectorStore(): Promise<InitializeResponseDto> {
|
||||
await this.pgVectorService.initializeVectorStore();
|
||||
const status = await this.pgVectorService.getVectorStoreStatus();
|
||||
|
||||
return {
|
||||
message: 'Pgvector store initialized successfully',
|
||||
documentCount: status.documentCount,
|
||||
};
|
||||
}
|
||||
|
||||
@Post('refresh')
|
||||
@ApiOperation({
|
||||
summary: 'Refresh pgvector store',
|
||||
description:
|
||||
'Refresh the pgvector store with the latest ICD codes data from the database. Useful after data updates.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Pgvector store refresh results',
|
||||
type: RefreshResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error during pgvector store refresh',
|
||||
})
|
||||
async refreshVectorStore(): Promise<RefreshResponseDto> {
|
||||
await this.pgVectorService.refreshVectorStore();
|
||||
const status = await this.pgVectorService.getVectorStoreStatus();
|
||||
|
||||
return {
|
||||
message: 'Pgvector store refreshed successfully',
|
||||
documentCount: status.documentCount,
|
||||
};
|
||||
}
|
||||
}
|
||||
10
src/icd/pgvector.module.ts
Normal file
10
src/icd/pgvector.module.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { PgVectorController } from './pgvector.controller';
|
||||
import { PgVectorService } from './pgvector.service';
|
||||
|
||||
@Module({
|
||||
controllers: [PgVectorController],
|
||||
providers: [PgVectorService],
|
||||
exports: [PgVectorService],
|
||||
})
|
||||
export class PgVectorModule {}
|
||||
611
src/icd/pgvector.service.ts
Normal file
611
src/icd/pgvector.service.ts
Normal file
@@ -0,0 +1,611 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { PrismaClient } from '../../generated/prisma';
|
||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
|
||||
import { Document } from 'langchain/document';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
export interface VectorSearchResult {
|
||||
id: string;
|
||||
code: string;
|
||||
display: string;
|
||||
version: string;
|
||||
category: string;
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
export interface EmbeddingRequest {
|
||||
text: string;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class PgVectorService {
|
||||
private readonly logger = new Logger(PgVectorService.name);
|
||||
private readonly prisma = new PrismaClient();
|
||||
private readonly pool: Pool;
|
||||
private vectorStore: PGVectorStore | null = null;
|
||||
private embeddings: OpenAIEmbeddings | null = null;
|
||||
|
||||
constructor() {
|
||||
// Initialize PostgreSQL connection pool
|
||||
this.pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
max: 20,
|
||||
idleTimeoutMillis: 30000,
|
||||
connectionTimeoutMillis: 2000,
|
||||
});
|
||||
|
||||
this.initializeEmbeddings();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize OpenAI embeddings
|
||||
*/
|
||||
private async initializeEmbeddings() {
|
||||
try {
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
this.logger.error(
|
||||
'OPENAI_API_KEY not found. Vector operations require OpenAI API key.',
|
||||
);
|
||||
throw new Error('OPENAI_API_KEY is required for vector operations');
|
||||
}
|
||||
|
||||
const apiModel = process.env.OPENAI_API_MODEL;
|
||||
const modelName = apiModel || 'text-embedding-ada-002';
|
||||
|
||||
this.embeddings = new OpenAIEmbeddings({
|
||||
openAIApiKey: apiKey,
|
||||
modelName: modelName,
|
||||
maxConcurrency: 5,
|
||||
});
|
||||
|
||||
this.logger.log(
|
||||
`OpenAI embeddings initialized successfully with model: ${modelName}`,
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to initialize OpenAI embeddings:', error);
|
||||
throw new Error(
|
||||
`Failed to initialize OpenAI embeddings: ${error.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize pgvector store dengan LangChain
|
||||
*/
|
||||
async initializeVectorStore(): Promise<void> {
|
||||
try {
|
||||
this.logger.log('Initializing pgvector store...');
|
||||
|
||||
if (!this.embeddings) {
|
||||
throw new Error(
|
||||
'OpenAI embeddings not initialized. Cannot create vector store.',
|
||||
);
|
||||
}
|
||||
|
||||
// Get database connection string
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
throw new Error('DATABASE_URL not found');
|
||||
}
|
||||
|
||||
// Initialize pgvector store without inserting data
|
||||
this.vectorStore = await PGVectorStore.initialize(this.embeddings, {
|
||||
postgresConnectionOptions: {
|
||||
connectionString,
|
||||
},
|
||||
tableName: 'icd_codes',
|
||||
columns: {
|
||||
idColumnName: 'id',
|
||||
vectorColumnName: 'embedding',
|
||||
contentColumnName: 'content',
|
||||
metadataColumnName: 'metadata',
|
||||
},
|
||||
});
|
||||
|
||||
this.logger.log('Pgvector store initialized successfully');
|
||||
} catch (error) {
|
||||
this.logger.error('Error initializing pgvector store:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding untuk text menggunakan OpenAI
|
||||
*/
|
||||
async generateEmbedding(
|
||||
text: string,
|
||||
model: string = 'text-embedding-ada-002',
|
||||
): Promise<number[]> {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Generating embedding for text: ${text.substring(0, 100)}...`,
|
||||
);
|
||||
|
||||
if (!this.embeddings) {
|
||||
throw new Error(
|
||||
'OpenAI embeddings not initialized. Please check your API configuration.',
|
||||
);
|
||||
}
|
||||
|
||||
// Use OpenAI embeddings
|
||||
const embedding = await this.embeddings.embedQuery(text);
|
||||
this.logger.log(
|
||||
`Generated OpenAI embedding with ${embedding.length} dimensions`,
|
||||
);
|
||||
return embedding;
|
||||
} catch (error) {
|
||||
this.logger.error('Error generating embedding:', error);
|
||||
throw new Error(`Failed to generate embedding: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate dan simpan embeddings untuk sample ICD codes (default: 100)
|
||||
*/
|
||||
async generateAndStoreAllEmbeddings(limit: number = 100): Promise<{
|
||||
processed: number;
|
||||
errors: number;
|
||||
totalSample: number;
|
||||
}> {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Starting batch embedding generation and storage for sample ${limit} ICD codes...`,
|
||||
);
|
||||
|
||||
// Get sample ICD codes without embeddings using raw SQL
|
||||
const codesWithoutEmbedding = await this.pool.query(
|
||||
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL LIMIT $1',
|
||||
[limit],
|
||||
);
|
||||
|
||||
if (codesWithoutEmbedding.rows.length === 0) {
|
||||
this.logger.log('All ICD codes already have embeddings');
|
||||
return { processed: 0, errors: 0, totalSample: 0 };
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Found ${codesWithoutEmbedding.rows.length} sample codes without embeddings (limited to ${limit})`,
|
||||
);
|
||||
|
||||
let processed = 0;
|
||||
let errors = 0;
|
||||
|
||||
// Process each code
|
||||
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
|
||||
const code = codesWithoutEmbedding.rows[i];
|
||||
try {
|
||||
// Create text representation for embedding
|
||||
const text = `${code.code} - ${code.display}`;
|
||||
|
||||
// Generate embedding
|
||||
const embedding = await this.generateEmbedding(text);
|
||||
|
||||
// Convert embedding array to proper vector format for pgvector
|
||||
const vectorString = `[${embedding.join(',')}]`;
|
||||
|
||||
// Update database with embedding, metadata, and content using raw SQL
|
||||
await this.pool.query(
|
||||
`UPDATE icd_codes
|
||||
SET embedding = $1::vector,
|
||||
metadata = $2::jsonb,
|
||||
content = $3
|
||||
WHERE id = $4`,
|
||||
[
|
||||
vectorString,
|
||||
JSON.stringify({
|
||||
id: code.id,
|
||||
code: code.code,
|
||||
display: code.display,
|
||||
version: code.version,
|
||||
category: code.category,
|
||||
}),
|
||||
text,
|
||||
code.id,
|
||||
],
|
||||
);
|
||||
|
||||
processed++;
|
||||
|
||||
if (processed % 10 === 0) {
|
||||
this.logger.log(
|
||||
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error(`Error processing code ${code.code}:`, error);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Sample embedding generation and storage completed. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
|
||||
);
|
||||
return {
|
||||
processed,
|
||||
errors,
|
||||
totalSample: codesWithoutEmbedding.rows.length,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error in generateAndStoreAllEmbeddings:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate dan simpan embeddings untuk sample ICD codes dengan kategori tertentu
|
||||
*/
|
||||
async generateAndStoreSampleEmbeddingsByCategory(
|
||||
category: string,
|
||||
limit: number = 100,
|
||||
): Promise<{
|
||||
processed: number;
|
||||
errors: number;
|
||||
totalSample: number;
|
||||
category: string;
|
||||
}> {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Starting batch embedding generation for sample ${limit} ICD codes in category: ${category}`,
|
||||
);
|
||||
|
||||
// Get sample ICD codes by category without embeddings using raw SQL
|
||||
const codesWithoutEmbedding = await this.pool.query(
|
||||
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL AND category = $1 LIMIT $2',
|
||||
[category, limit],
|
||||
);
|
||||
|
||||
if (codesWithoutEmbedding.rows.length === 0) {
|
||||
this.logger.log(
|
||||
`No ICD codes found in category '${category}' without embeddings`,
|
||||
);
|
||||
return { processed: 0, errors: 0, totalSample: 0, category };
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Found ${codesWithoutEmbedding.rows.length} sample codes in category '${category}' without embeddings (limited to ${limit})`,
|
||||
);
|
||||
|
||||
let processed = 0;
|
||||
let errors = 0;
|
||||
|
||||
// Process each code
|
||||
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
|
||||
const code = codesWithoutEmbedding.rows[i];
|
||||
try {
|
||||
// Create text representation for embedding
|
||||
const text = `${code.code} - ${code.display}`;
|
||||
|
||||
// Generate embedding
|
||||
const embedding = await this.generateEmbedding(text);
|
||||
|
||||
// Convert embedding array to proper vector format for pgvector
|
||||
const vectorString = `[${embedding.join(',')}]`;
|
||||
|
||||
// Update database with embedding, metadata, and content using raw SQL
|
||||
await this.pool.query(
|
||||
`UPDATE icd_codes
|
||||
SET embedding = $1::vector,
|
||||
metadata = $2::jsonb,
|
||||
content = $3
|
||||
WHERE id = $4`,
|
||||
[
|
||||
vectorString,
|
||||
JSON.stringify({
|
||||
id: code.id,
|
||||
code: code.code,
|
||||
display: code.display,
|
||||
version: code.version,
|
||||
category: code.category,
|
||||
}),
|
||||
text,
|
||||
code.id,
|
||||
],
|
||||
);
|
||||
|
||||
processed++;
|
||||
|
||||
if (processed % 10 === 0) {
|
||||
this.logger.log(
|
||||
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings in category '${category}'`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error(`Error processing code ${code.code}:`, error);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Sample embedding generation completed for category '${category}'. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
|
||||
);
|
||||
return {
|
||||
processed,
|
||||
errors,
|
||||
totalSample: codesWithoutEmbedding.rows.length,
|
||||
category,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
`Error in generateAndStoreSampleEmbeddingsByCategory for category '${category}':`,
|
||||
error,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector similarity search menggunakan pgvector
|
||||
*/
|
||||
async vectorSearch(
|
||||
query: string,
|
||||
limit: number = 10,
|
||||
category?: string,
|
||||
threshold: number = 0.7,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
this.logger.log(`Performing pgvector search for: ${query}`);
|
||||
|
||||
if (!this.embeddings) {
|
||||
throw new Error('OpenAI embeddings not initialized');
|
||||
}
|
||||
|
||||
// Generate embedding for query
|
||||
const queryEmbedding = await this.generateEmbedding(query);
|
||||
|
||||
// Convert embedding array to proper vector format for pgvector
|
||||
const vectorString = `[${queryEmbedding.join(',')}]`;
|
||||
|
||||
// Build SQL query for vector similarity search
|
||||
let sql = `
|
||||
SELECT
|
||||
id, code, display, version, category,
|
||||
1 - (embedding <=> $1::vector) as similarity
|
||||
FROM icd_codes
|
||||
WHERE embedding IS NOT NULL
|
||||
`;
|
||||
|
||||
const params: any[] = [vectorString];
|
||||
let paramIndex = 2;
|
||||
|
||||
if (category) {
|
||||
sql += ` AND category = $${paramIndex}`;
|
||||
params.push(category);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
sql += ` ORDER BY embedding <=> $1::vector ASC LIMIT $${paramIndex}`;
|
||||
params.push(limit);
|
||||
|
||||
// Execute raw SQL query
|
||||
const result = await this.pool.query(sql, params);
|
||||
|
||||
// Transform and filter results
|
||||
const filteredResults: VectorSearchResult[] = result.rows
|
||||
.filter((row: any) => row.similarity >= threshold)
|
||||
.map((row: any) => ({
|
||||
id: row.id,
|
||||
code: row.code,
|
||||
display: row.display,
|
||||
version: row.version,
|
||||
category: row.category,
|
||||
similarity: parseFloat(row.similarity),
|
||||
}));
|
||||
|
||||
this.logger.log(
|
||||
`Pgvector search returned ${filteredResults.length} results for query: "${query}"`,
|
||||
);
|
||||
return filteredResults;
|
||||
} catch (error) {
|
||||
this.logger.error('Error in pgvector search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hybrid search: combine vector similarity dengan text search
|
||||
*/
|
||||
async hybridSearch(
|
||||
query: string,
|
||||
limit: number = 10,
|
||||
category?: string,
|
||||
vectorWeight: number = 0.7,
|
||||
textWeight: number = 0.3,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
this.logger.log(`Performing hybrid search for: ${query}`);
|
||||
|
||||
// Get vector search results
|
||||
const vectorResults = await this.vectorSearch(
|
||||
query,
|
||||
limit * 2,
|
||||
category,
|
||||
0.5,
|
||||
);
|
||||
|
||||
// Get text search results
|
||||
const textResults = await this.textSearch(query, limit * 2, category);
|
||||
|
||||
// Combine and score results
|
||||
const combinedResults = new Map<string, VectorSearchResult>();
|
||||
|
||||
// Add vector results
|
||||
for (const result of vectorResults) {
|
||||
combinedResults.set(result.id, {
|
||||
...result,
|
||||
similarity: result.similarity * vectorWeight,
|
||||
});
|
||||
}
|
||||
|
||||
// Add text results with text scoring
|
||||
for (const result of textResults) {
|
||||
const existing = combinedResults.get(result.id);
|
||||
if (existing) {
|
||||
// Combine scores
|
||||
existing.similarity += (result.similarity || 0.5) * textWeight;
|
||||
} else {
|
||||
combinedResults.set(result.id, {
|
||||
...result,
|
||||
similarity: (result.similarity || 0.5) * textWeight,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to array, sort by combined score, and limit
|
||||
const results = Array.from(combinedResults.values());
|
||||
results.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
return results.slice(0, limit);
|
||||
} catch (error) {
|
||||
this.logger.error('Error in hybrid search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Text-based search dengan scoring
|
||||
*/
|
||||
private async textSearch(
|
||||
query: string,
|
||||
limit: number,
|
||||
category?: string,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
let sql = 'SELECT id, code, display, version, category FROM icd_codes';
|
||||
const params: any[] = [];
|
||||
let whereConditions: string[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (category) {
|
||||
whereConditions.push(`category = $${paramIndex}`);
|
||||
params.push(category);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
if (query) {
|
||||
whereConditions.push(
|
||||
`(code ILIKE $${paramIndex} OR display ILIKE $${paramIndex})`,
|
||||
);
|
||||
params.push(`%${query}%`);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
if (whereConditions.length > 0) {
|
||||
sql += ' WHERE ' + whereConditions.join(' AND ');
|
||||
}
|
||||
|
||||
sql += ' ORDER BY code ASC LIMIT $' + paramIndex;
|
||||
params.push(limit);
|
||||
|
||||
const result = await this.pool.query(sql, params);
|
||||
|
||||
return result.rows.map((code) => ({
|
||||
id: code.id,
|
||||
code: code.code,
|
||||
display: code.display,
|
||||
version: code.version,
|
||||
category: code.category,
|
||||
similarity: 0.5, // Default text similarity score
|
||||
}));
|
||||
} catch (error) {
|
||||
this.logger.error('Error in text search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding statistics
|
||||
*/
|
||||
async getEmbeddingStats(): Promise<{
|
||||
total: number;
|
||||
withEmbeddings: number;
|
||||
withoutEmbeddings: number;
|
||||
percentage: number;
|
||||
vectorStoreStatus: string;
|
||||
}> {
|
||||
try {
|
||||
// Use raw SQL to get embedding statistics
|
||||
const [totalResult, withEmbeddingsResult] = await Promise.all([
|
||||
this.pool.query('SELECT COUNT(*) as count FROM icd_codes'),
|
||||
this.pool.query(
|
||||
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
|
||||
),
|
||||
]);
|
||||
|
||||
const total = parseInt(totalResult.rows[0].count);
|
||||
const withEmbeddings = parseInt(withEmbeddingsResult.rows[0].count);
|
||||
const withoutEmbeddings = total - withEmbeddings;
|
||||
const percentage = total > 0 ? (withEmbeddings / total) * 100 : 0;
|
||||
const vectorStoreStatus = this.vectorStore
|
||||
? 'Initialized'
|
||||
: 'Not Initialized';
|
||||
|
||||
return {
|
||||
total,
|
||||
withEmbeddings,
|
||||
withoutEmbeddings,
|
||||
percentage: Math.round(percentage * 100) / 100,
|
||||
vectorStoreStatus,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error getting embedding stats:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh vector store dengan data terbaru
|
||||
*/
|
||||
async refreshVectorStore(): Promise<void> {
|
||||
try {
|
||||
this.logger.log('Refreshing pgvector store...');
|
||||
await this.initializeVectorStore();
|
||||
this.logger.log('Pgvector store refreshed successfully');
|
||||
} catch (error) {
|
||||
this.logger.error('Error refreshing pgvector store:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vector store status
|
||||
*/
|
||||
async getVectorStoreStatus(): Promise<{
|
||||
initialized: boolean;
|
||||
documentCount: number;
|
||||
embeddingModel: string;
|
||||
lastUpdated: Date;
|
||||
}> {
|
||||
try {
|
||||
// Get document count from database using raw SQL
|
||||
const result = await this.pool.query(
|
||||
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
|
||||
);
|
||||
const documentCount = parseInt(result.rows[0].count);
|
||||
|
||||
const status = {
|
||||
initialized: !!this.vectorStore,
|
||||
documentCount,
|
||||
embeddingModel: this.embeddings
|
||||
? `OpenAI ${process.env.OPENAI_API_MODEL || 'text-embedding-ada-002'}`
|
||||
: 'Not Available',
|
||||
lastUpdated: new Date(),
|
||||
};
|
||||
|
||||
return status;
|
||||
} catch (error) {
|
||||
this.logger.error('Error getting vector store status:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup resources
|
||||
*/
|
||||
async onModuleDestroy() {
|
||||
await this.prisma.$disconnect();
|
||||
await this.pool.end();
|
||||
}
|
||||
}
|
||||
129
src/main.ts
129
src/main.ts
@@ -1,8 +1,133 @@
|
||||
import { NestFactory } from '@nestjs/core';
|
||||
import { AppModule } from './app.module';
|
||||
import { Logger, ValidationPipe } from '@nestjs/common';
|
||||
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
|
||||
|
||||
async function bootstrap() {
|
||||
const logger = new Logger('Bootstrap');
|
||||
|
||||
const app = await NestFactory.create(AppModule);
|
||||
await app.listen(process.env.PORT ?? 3000);
|
||||
|
||||
// Environment configuration
|
||||
const port = process.env.PORT ?? 3000;
|
||||
const host = process.env.HOST ?? 'localhost';
|
||||
const nodeEnv = process.env.NODE_ENV ?? 'development';
|
||||
|
||||
// CORS Configuration
|
||||
const corsOrigins = process.env.CORS_ORIGINS?.split(',') ?? [
|
||||
'http://localhost:3000',
|
||||
];
|
||||
const corsMethods = process.env.CORS_METHODS?.split(',') ?? [
|
||||
'GET',
|
||||
'HEAD',
|
||||
'PUT',
|
||||
'PATCH',
|
||||
'POST',
|
||||
'DELETE',
|
||||
'OPTIONS',
|
||||
];
|
||||
const corsHeaders = process.env.CORS_HEADERS?.split(',') ?? [
|
||||
'Content-Type',
|
||||
'Accept',
|
||||
'Authorization',
|
||||
'X-Requested-With',
|
||||
];
|
||||
const corsCredentials = process.env.CORS_CREDENTIALS === 'true';
|
||||
|
||||
// Enable CORS
|
||||
app.enableCors({
|
||||
origin: corsOrigins,
|
||||
methods: corsMethods,
|
||||
allowedHeaders: corsHeaders,
|
||||
credentials: corsCredentials,
|
||||
});
|
||||
|
||||
// Enable global validation pipe
|
||||
app.useGlobalPipes(
|
||||
new ValidationPipe({
|
||||
whitelist: true,
|
||||
forbidNonWhitelisted: true,
|
||||
transform: true,
|
||||
transformOptions: {
|
||||
enableImplicitConversion: true,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
// Setup Swagger Documentation
|
||||
if (process.env.ENABLE_DOCS === 'true') {
|
||||
const config = new DocumentBuilder()
|
||||
.setTitle('Claim Guard API')
|
||||
.setDescription(
|
||||
'API documentation for Claim Guard Backend - ICD Code Management System',
|
||||
)
|
||||
.setVersion('1.0.0')
|
||||
.setContact(
|
||||
'Development Team',
|
||||
'https://github.com/your-org/claim-guard-be',
|
||||
'dev@yourdomain.com',
|
||||
)
|
||||
.setLicense('MIT', 'https://opensource.org/licenses/MIT')
|
||||
.addServer(
|
||||
process.env.APP_URL || 'http://localhost:3000',
|
||||
'Development Server',
|
||||
)
|
||||
.addTag('ICD', 'ICD Code management operations')
|
||||
.addTag('Health', 'Application health and monitoring')
|
||||
.addBearerAuth(
|
||||
{
|
||||
type: 'http',
|
||||
scheme: 'bearer',
|
||||
bearerFormat: 'JWT',
|
||||
name: 'JWT',
|
||||
description: 'Enter JWT token',
|
||||
in: 'header',
|
||||
},
|
||||
'JWT-auth',
|
||||
)
|
||||
.build();
|
||||
|
||||
const document = SwaggerModule.createDocument(app, config);
|
||||
SwaggerModule.setup('docs', app, document, {
|
||||
swaggerOptions: {
|
||||
persistAuthorization: true,
|
||||
docExpansion: 'none',
|
||||
filter: true,
|
||||
showRequestDuration: true,
|
||||
},
|
||||
customSiteTitle: 'Claim Guard API Documentation',
|
||||
customfavIcon: '/favicon.ico',
|
||||
customCss: '.swagger-ui .topbar { display: none }',
|
||||
});
|
||||
|
||||
logger.log(
|
||||
`📚 Swagger Documentation enabled at: http://${host}:${port}/docs`,
|
||||
);
|
||||
}
|
||||
|
||||
// Global prefix for API endpoints (optional)
|
||||
// app.setGlobalPrefix('api/v1');
|
||||
|
||||
// Request timeout
|
||||
const requestTimeout = parseInt(process.env.REQUEST_TIMEOUT ?? '30000');
|
||||
|
||||
// Graceful shutdown
|
||||
app.enableShutdownHooks();
|
||||
|
||||
await app.listen(port, host);
|
||||
|
||||
logger.log(`🚀 Application is running on: http://${host}:${port}`);
|
||||
logger.log(`🌍 Environment: ${nodeEnv}`);
|
||||
logger.log(`🔐 CORS Origins: ${corsOrigins.join(', ')}`);
|
||||
|
||||
if (process.env.HEALTH_CHECK_ENABLED === 'true') {
|
||||
logger.log(
|
||||
`❤️ Health Check available at: http://${host}:${port}${process.env.HEALTH_CHECK_PATH || '/health'}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
bootstrap();
|
||||
|
||||
bootstrap().catch((error) => {
|
||||
console.error('❌ Error starting server:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
Reference in New Issue
Block a user