add pg vector and embed
This commit is contained in:
113
.dockerignore
Normal file
113
.dockerignore
Normal file
@@ -0,0 +1,113 @@
|
||||
# Dependencies
|
||||
node_modules
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
|
||||
# Production build
|
||||
dist
|
||||
|
||||
# Environment files
|
||||
.env
|
||||
.env.local
|
||||
.env.development.local
|
||||
.env.test.local
|
||||
.env.production.local
|
||||
|
||||
# IDE files
|
||||
.vscode
|
||||
.idea
|
||||
*.swp
|
||||
*.swo
|
||||
*~
|
||||
|
||||
# OS generated files
|
||||
.DS_Store
|
||||
.DS_Store?
|
||||
._*
|
||||
.Spotlight-V100
|
||||
.Trashes
|
||||
ehthumbs.db
|
||||
Thumbs.db
|
||||
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
|
||||
# Coverage directory used by tools like istanbul
|
||||
coverage
|
||||
*.lcov
|
||||
|
||||
# nyc test coverage
|
||||
.nyc_output
|
||||
|
||||
# Dependency directories
|
||||
node_modules/
|
||||
jspm_packages/
|
||||
|
||||
# Optional npm cache directory
|
||||
.npm
|
||||
|
||||
# Optional REPL history
|
||||
.node_repl_history
|
||||
|
||||
# Output of 'npm pack'
|
||||
*.tgz
|
||||
|
||||
# Yarn Integrity file
|
||||
.yarn-integrity
|
||||
|
||||
# parcel-bundler cache (https://parceljs.org/)
|
||||
.cache
|
||||
.parcel-cache
|
||||
|
||||
# next.js build output
|
||||
.next
|
||||
|
||||
# nuxt.js build output
|
||||
.nuxt
|
||||
|
||||
# vuepress build output
|
||||
.vuepress/dist
|
||||
|
||||
# Serverless directories
|
||||
.serverless
|
||||
|
||||
# FuseBox cache
|
||||
.fusebox/
|
||||
|
||||
# DynamoDB Local files
|
||||
.dynamodb/
|
||||
|
||||
# TernJS port file
|
||||
.tern-port
|
||||
|
||||
# Stores VSCode versions used for testing VSCode extensions
|
||||
.vscode-test
|
||||
|
||||
# Generated files
|
||||
generated/
|
||||
|
||||
# Git
|
||||
.git
|
||||
.gitignore
|
||||
|
||||
# Docker
|
||||
Dockerfile
|
||||
.dockerignore
|
||||
docker-compose*.yml
|
||||
|
||||
# Documentation
|
||||
*.md
|
||||
!README.md
|
||||
|
||||
# Test files (keep only the Excel files)
|
||||
test/**
|
||||
!test/*.xlsx
|
||||
|
||||
# Uploads (will be created at runtime)
|
||||
uploads/
|
||||
|
||||
# Temporary files
|
||||
.temp
|
||||
.tmp
|
||||
179
.env.example
Normal file
179
.env.example
Normal file
@@ -0,0 +1,179 @@
|
||||
# =================================
|
||||
# APPLICATION CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# Environment mode: development, staging, production
|
||||
NODE_ENV=development
|
||||
|
||||
# Application port
|
||||
PORT=3000
|
||||
|
||||
# Application host (for binding)
|
||||
HOST=localhost
|
||||
|
||||
# Application base URL (for CORS and other purposes)
|
||||
APP_URL=http://localhost:3000
|
||||
|
||||
# =================================
|
||||
# DATABASE CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# PostgreSQL Database URL
|
||||
# Format: postgresql://username:password@host:port/database_name
|
||||
DATABASE_URL=postgresql://username:password@localhost:5432/claim_guard
|
||||
|
||||
# Database connection pool settings
|
||||
DB_POOL_MIN=2
|
||||
DB_POOL_MAX=10
|
||||
|
||||
# =================================
|
||||
# CORS CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# Allowed origins for CORS (comma-separated)
|
||||
# Use * for allow all origins (NOT recommended for production)
|
||||
# Examples:
|
||||
# - Development: http://localhost:3000,http://localhost:3001
|
||||
# - Production: https://yourdomain.com,https://www.yourdomain.com
|
||||
CORS_ORIGINS=http://localhost:3000,http://localhost:3001,http://localhost:8080
|
||||
|
||||
# Allowed methods for CORS (comma-separated)
|
||||
CORS_METHODS=GET,HEAD,PUT,PATCH,POST,DELETE,OPTIONS
|
||||
|
||||
# Allowed headers for CORS (comma-separated)
|
||||
CORS_HEADERS=Content-Type,Accept,Authorization,X-Requested-With
|
||||
|
||||
# Allow credentials in CORS requests
|
||||
CORS_CREDENTIALS=true
|
||||
|
||||
# =================================
|
||||
# SECURITY CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# JWT Secret for authentication (generate strong secret for production)
|
||||
JWT_SECRET=your-super-secret-jwt-key-change-this-in-production
|
||||
|
||||
# JWT Token expiration time
|
||||
JWT_EXPIRES_IN=24h
|
||||
|
||||
# JWT Refresh token expiration
|
||||
JWT_REFRESH_EXPIRES_IN=7d
|
||||
|
||||
# API Rate limiting (requests per minute)
|
||||
RATE_LIMIT_MAX=100
|
||||
|
||||
# =================================
|
||||
# LOGGING CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# Log level: error, warn, info, debug, verbose
|
||||
LOG_LEVEL=info
|
||||
|
||||
# Log format: json, simple
|
||||
LOG_FORMAT=simple
|
||||
|
||||
# Enable request logging
|
||||
LOG_REQUESTS=true
|
||||
|
||||
# =================================
|
||||
# FILE UPLOAD CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# Maximum file size for uploads (in bytes)
|
||||
# 10MB = 10485760, 50MB = 52428800
|
||||
MAX_FILE_SIZE=10485760
|
||||
|
||||
# Allowed file types for upload (comma-separated)
|
||||
ALLOWED_FILE_TYPES=.xlsx,.xls,.csv
|
||||
|
||||
# Upload directory path
|
||||
UPLOAD_DIR=./uploads
|
||||
|
||||
# =================================
|
||||
# CACHE CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# Redis URL for caching (optional)
|
||||
# REDIS_URL=redis://localhost:6379
|
||||
|
||||
# Cache TTL in seconds (default: 1 hour)
|
||||
CACHE_TTL=3600
|
||||
|
||||
# =================================
|
||||
# EMAIL CONFIGURATION (Optional)
|
||||
# =================================
|
||||
|
||||
# SMTP configuration for sending emails
|
||||
# SMTP_HOST=smtp.gmail.com
|
||||
# SMTP_PORT=587
|
||||
# SMTP_SECURE=false
|
||||
# SMTP_USER=your-email@gmail.com
|
||||
# SMTP_PASS=your-email-password
|
||||
|
||||
# Email from address
|
||||
# EMAIL_FROM=noreply@yourdomain.com
|
||||
|
||||
# =================================
|
||||
# THIRD-PARTY INTEGRATIONS
|
||||
# =================================
|
||||
|
||||
# External API keys
|
||||
# EXTERNAL_API_KEY=your-api-key
|
||||
# EXTERNAL_API_URL=https://api.external-service.com
|
||||
|
||||
# =================================
|
||||
# DEVELOPMENT SETTINGS
|
||||
# =================================
|
||||
|
||||
# Enable API documentation (Swagger)
|
||||
ENABLE_DOCS=true
|
||||
|
||||
# Enable debug mode
|
||||
DEBUG=true
|
||||
|
||||
# Enable database logging
|
||||
DB_LOGGING=true
|
||||
|
||||
# =================================
|
||||
# PRODUCTION SETTINGS
|
||||
# =================================
|
||||
|
||||
# When NODE_ENV=production, ensure these are set:
|
||||
# - Strong JWT_SECRET
|
||||
# - Specific CORS_ORIGINS (not *)
|
||||
# - DB_LOGGING=false
|
||||
# - DEBUG=false
|
||||
# - LOG_LEVEL=warn or error
|
||||
|
||||
# Health check endpoint settings
|
||||
HEALTH_CHECK_ENABLED=true
|
||||
|
||||
# Request timeout in milliseconds
|
||||
REQUEST_TIMEOUT=30000
|
||||
|
||||
# =================================
|
||||
# ICD SPECIFIC CONFIGURATION
|
||||
# =================================
|
||||
|
||||
# Path to ICD data files
|
||||
ICD9_FILE_PATH=./test/[PUBLIC] ICD-9CM e-klaim.xlsx
|
||||
ICD10_FILE_PATH=./test/[PUBLIC] ICD-10 e-klaim.xlsx
|
||||
|
||||
# ICD import batch size
|
||||
ICD_IMPORT_BATCH_SIZE=1000
|
||||
|
||||
# =================================
|
||||
# MONITORING & METRICS
|
||||
# =================================
|
||||
|
||||
# Enable application metrics
|
||||
METRICS_ENABLED=true
|
||||
|
||||
# Metrics endpoint path
|
||||
METRICS_PATH=/metrics
|
||||
|
||||
# Enable health check endpoint
|
||||
HEALTH_CHECK_PATH=/health
|
||||
|
||||
OPENAI_API_KEY=xxxxxx
|
||||
OPENAI_API_MODEL=text-embedding-ada-002
|
||||
473
README.md
473
README.md
@@ -1,194 +1,397 @@
|
||||
<p align="center">
|
||||
<a href="http://nestjs.com/" target="blank"><img src="https://nestjs.com/img/logo-small.svg" width="120" alt="Nest Logo" /></a>
|
||||
</p>
|
||||
# 🏥 Claim Guard Backend
|
||||
|
||||
[circleci-image]: https://img.shields.io/circleci/build/github/nestjs/nest/master?token=abc123def456
|
||||
[circleci-url]: https://circleci.com/gh/nestjs/nest
|
||||
> **NestJS application for managing ICD-9 and ICD-10 medical codes with Excel import functionality**
|
||||
|
||||
<p align="center">A progressive <a href="http://nodejs.org" target="_blank">Node.js</a> framework for building efficient and scalable server-side applications.</p>
|
||||
<p align="center">
|
||||
<a href="https://www.npmjs.com/~nestjscore" target="_blank"><img src="https://img.shields.io/npm/v/@nestjs/core.svg" alt="NPM Version" /></a>
|
||||
<a href="https://www.npmjs.com/~nestjscore" target="_blank"><img src="https://img.shields.io/npm/l/@nestjs/core.svg" alt="Package License" /></a>
|
||||
<a href="https://www.npmjs.com/~nestjscore" target="_blank"><img src="https://img.shields.io/npm/dm/@nestjs/common.svg" alt="NPM Downloads" /></a>
|
||||
<a href="https://circleci.com/gh/nestjs/nest" target="_blank"><img src="https://img.shields.io/circleci/build/github/nestjs/nest/master" alt="CircleCI" /></a>
|
||||
<a href="https://discord.gg/G7Qnnhy" target="_blank"><img src="https://img.shields.io/badge/discord-online-brightgreen.svg" alt="Discord"/></a>
|
||||
<a href="https://opencollective.com/nest#backer" target="_blank"><img src="https://opencollective.com/nest/backers/badge.svg" alt="Backers on Open Collective" /></a>
|
||||
<a href="https://opencollective.com/nest#sponsor" target="_blank"><img src="https://opencollective.com/nest/sponsors/badge.svg" alt="Sponsors on Open Collective" /></a>
|
||||
<a href="https://paypal.me/kamilmysliwiec" target="_blank"><img src="https://img.shields.io/badge/Donate-PayPal-ff3f59.svg" alt="Donate us"/></a>
|
||||
<a href="https://opencollective.com/nest#sponsor" target="_blank"><img src="https://img.shields.io/badge/Support%20us-Open%20Collective-41B883.svg" alt="Support us"></a>
|
||||
<a href="https://twitter.com/nestframework" target="_blank"><img src="https://img.shields.io/twitter/follow/nestframework.svg?style=social&label=Follow" alt="Follow us on Twitter"></a>
|
||||
</p>
|
||||
<!--[](https://opencollective.com/nest#backer)
|
||||
[](https://opencollective.com/nest#sponsor)-->
|
||||
[](https://nestjs.com/)
|
||||
[](https://www.typescriptlang.org/)
|
||||
[](https://www.postgresql.org/)
|
||||
[](https://www.docker.com/)
|
||||
[](https://www.prisma.io/)
|
||||
|
||||
## Description
|
||||
## ✨ Features
|
||||
|
||||
Claim Guard Backend - A NestJS application for managing ICD-9 and ICD-10 medical codes with Excel import functionality.
|
||||
- 🏥 **ICD Code Management** - Import and manage ICD-9 and ICD-10 medical codes
|
||||
- 📊 **Excel Import** - Read data from Excel files and store in PostgreSQL
|
||||
- 🔍 **Search & Filter** - Search codes by category, code, or display text
|
||||
- 🌐 **REST API** - Full REST API with Swagger documentation
|
||||
- 📄 **Pagination** - Built-in pagination for large datasets
|
||||
- 🐳 **Docker Ready** - PostgreSQL with pgvector extension
|
||||
- 🔄 **Database Migrations** - Prisma migration system
|
||||
- ✅ **Input Validation** - Type-safe DTOs with validation
|
||||
- 📚 **API Documentation** - Interactive Swagger UI
|
||||
|
||||
## Features
|
||||
## 🚀 Quick Start
|
||||
|
||||
- **ICD Code Management**: Import and manage ICD-9 and ICD-10 medical codes
|
||||
- **Excel Import**: Read data from Excel files and store in PostgreSQL database
|
||||
- **Search & Filter**: Search codes by category, code, or display text
|
||||
- **REST API**: Full REST API endpoints for accessing ICD data
|
||||
- **Pagination**: Built-in pagination support for large datasets
|
||||
### Prerequisites
|
||||
|
||||
## ICD Service Endpoints
|
||||
- Node.js 18+
|
||||
- Docker & Docker Compose
|
||||
- Git
|
||||
|
||||
### Import Data
|
||||
|
||||
```bash
|
||||
POST /icd/import
|
||||
```
|
||||
|
||||
Imports ICD-9 and ICD-10 data from Excel files in the `test/` directory.
|
||||
|
||||
### Search Codes
|
||||
|
||||
```bash
|
||||
GET /icd/search?category=ICD10&search=diabetes&page=1&limit=10
|
||||
```
|
||||
|
||||
Search ICD codes with optional filters:
|
||||
|
||||
- `category`: Filter by ICD9 or ICD10
|
||||
- `search`: Search in code or display text
|
||||
- `page`: Page number (default: 1)
|
||||
- `limit`: Items per page (default: 10)
|
||||
|
||||
### Get Statistics
|
||||
|
||||
```bash
|
||||
GET /icd/statistics
|
||||
```
|
||||
|
||||
Returns count statistics for ICD codes.
|
||||
|
||||
## Database Schema
|
||||
|
||||
The application uses PostgreSQL with Prisma ORM. The ICD codes are stored in the `icd_codes` table with the following structure:
|
||||
|
||||
```sql
|
||||
CREATE TABLE "icd_codes" (
|
||||
"id" TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
|
||||
"code" TEXT UNIQUE NOT NULL,
|
||||
"display" TEXT NOT NULL,
|
||||
"version" TEXT NOT NULL,
|
||||
"category" TEXT NOT NULL, -- "ICD9" or "ICD10"
|
||||
"createdAt" TIMESTAMP DEFAULT NOW(),
|
||||
"updatedAt" TIMESTAMP DEFAULT NOW()
|
||||
);
|
||||
```
|
||||
|
||||
**ID Format**: The `id` field now uses UUID (Universal Unique Identifier) format like `550e8400-e29b-41d4-a716-446655440000` instead of CUID.
|
||||
|
||||
## Setup Instructions
|
||||
|
||||
1. **Install Dependencies**
|
||||
### 1. Clone & Install
|
||||
|
||||
```bash
|
||||
git clone <repository-url>
|
||||
cd claim-guard-be
|
||||
npm install
|
||||
```
|
||||
|
||||
2. **Database Setup**
|
||||
Create a `.env` file with your PostgreSQL connection:
|
||||
### 2. Start Database
|
||||
|
||||
```bash
|
||||
DATABASE_URL="postgresql://username:password@localhost:5432/claim_guard_db?schema=public"
|
||||
# Start PostgreSQL with pgvector
|
||||
docker-compose up -d
|
||||
|
||||
# Verify database is running
|
||||
docker-compose ps
|
||||
```
|
||||
|
||||
3. **Generate Prisma Client**
|
||||
### 3. Setup Environment
|
||||
|
||||
```bash
|
||||
# Copy environment template (edit as needed)
|
||||
copy .env.example .env
|
||||
|
||||
# Update DATABASE_URL in .env:
|
||||
DATABASE_URL=postgresql://postgres:postgres123@localhost:5432/claim_guard
|
||||
```
|
||||
|
||||
### 4. Run Migrations
|
||||
|
||||
```bash
|
||||
# Apply database schema
|
||||
npx prisma migrate deploy
|
||||
|
||||
# Generate Prisma client
|
||||
npx prisma generate
|
||||
```
|
||||
|
||||
4. **Run Database Migrations**
|
||||
### 5. Start Application
|
||||
|
||||
```bash
|
||||
npx prisma db push
|
||||
# Development mode
|
||||
npm run start:dev
|
||||
|
||||
# Production mode
|
||||
npm run build
|
||||
npm run start:prod
|
||||
```
|
||||
|
||||
5. **Place Excel Files**
|
||||
Ensure the following files are in the `test/` directory:
|
||||
### 6. Access Services
|
||||
|
||||
- `[PUBLIC] ICD-9CM e-klaim.xlsx`
|
||||
- `[PUBLIC] ICD-10 e-klaim.xlsx`
|
||||
- **API**: http://localhost:3000
|
||||
- **Swagger Docs**: http://localhost:3000/docs
|
||||
- **Health Check**: http://localhost:3000/health
|
||||
|
||||
The Excel files should have at least 3 columns:
|
||||
## 📚 API Endpoints
|
||||
|
||||
- Column 1: Code
|
||||
- Column 2: Display/Description
|
||||
- Column 3: Version
|
||||
### ICD Management
|
||||
|
||||
## Project setup
|
||||
| Method | Endpoint | Description |
|
||||
| ------ | ----------------- | -------------------------------- |
|
||||
| `POST` | `/icd/import` | Import ICD data from Excel files |
|
||||
| `GET` | `/icd/search` | Search ICD codes with filters |
|
||||
| `GET` | `/icd/statistics` | Get database statistics |
|
||||
|
||||
### Health & Monitoring
|
||||
|
||||
| Method | Endpoint | Description |
|
||||
| ------ | --------------- | ----------------------------- |
|
||||
| `GET` | `/health` | Application health check |
|
||||
| `GET` | `/health/ready` | Readiness probe |
|
||||
| `GET` | `/health/live` | Liveness probe |
|
||||
| `GET` | `/docs` | Interactive API documentation |
|
||||
|
||||
### Example Usage
|
||||
|
||||
```bash
|
||||
$ npm install
|
||||
# Import ICD data
|
||||
curl -X POST http://localhost:3000/icd/import
|
||||
|
||||
# Search for diabetes codes
|
||||
curl "http://localhost:3000/icd/search?search=diabetes&page=1&limit=10"
|
||||
|
||||
# Get statistics
|
||||
curl http://localhost:3000/icd/statistics
|
||||
|
||||
# Health check
|
||||
curl http://localhost:3000/health
|
||||
```
|
||||
|
||||
## Compile and run the project
|
||||
## 🐳 Docker Setup
|
||||
|
||||
### Database Only (Recommended)
|
||||
|
||||
```bash
|
||||
# development
|
||||
$ npm run start
|
||||
# Start PostgreSQL with pgvector
|
||||
docker-compose up -d
|
||||
|
||||
# watch mode
|
||||
$ npm run start:dev
|
||||
# Stop database
|
||||
docker-compose down
|
||||
|
||||
# production mode
|
||||
$ npm run start:prod
|
||||
# Reset database (deletes all data!)
|
||||
docker-compose down -v
|
||||
```
|
||||
|
||||
## Run tests
|
||||
### Connection Details
|
||||
|
||||
```env
|
||||
DATABASE_URL=postgresql://postgres:postgres123@localhost:5432/claim_guard
|
||||
Host: localhost
|
||||
Port: 5432
|
||||
Database: claim_guard
|
||||
Username: postgres
|
||||
Password: postgres123
|
||||
```
|
||||
|
||||
### Verify pgvector Extension
|
||||
|
||||
```bash
|
||||
# unit tests
|
||||
$ npm run test
|
||||
|
||||
# e2e tests
|
||||
$ npm run test:e2e
|
||||
|
||||
# test coverage
|
||||
$ npm run test:cov
|
||||
docker-compose exec postgres psql -U postgres -d claim_guard -c "SELECT name, default_version, installed_version FROM pg_available_extensions WHERE name = 'vector';"
|
||||
```
|
||||
|
||||
## Deployment
|
||||
## 🗂️ Database Schema
|
||||
|
||||
When you're ready to deploy your NestJS application to production, there are some key steps you can take to ensure it runs as efficiently as possible. Check out the [deployment documentation](https://docs.nestjs.com/deployment) for more information.
|
||||
### IcdCode Model
|
||||
|
||||
If you are looking for a cloud-based platform to deploy your NestJS application, check out [Mau](https://mau.nestjs.com), our official platform for deploying NestJS applications on AWS. Mau makes deployment straightforward and fast, requiring just a few simple steps:
|
||||
```prisma
|
||||
model IcdCode {
|
||||
id String @id @default(uuid())
|
||||
code String @unique
|
||||
display String
|
||||
version String
|
||||
category String // "ICD9" or "ICD10"
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
@@map("icd_codes")
|
||||
}
|
||||
```
|
||||
|
||||
### Migration Commands
|
||||
|
||||
```bash
|
||||
$ npm install -g @nestjs/mau
|
||||
$ mau deploy
|
||||
# Check migration status
|
||||
npx prisma migrate status
|
||||
|
||||
# Create new migration
|
||||
npx prisma migrate dev --name description
|
||||
|
||||
# Deploy to production
|
||||
npx prisma migrate deploy
|
||||
|
||||
# Reset database (development)
|
||||
npx prisma migrate reset
|
||||
```
|
||||
|
||||
With Mau, you can deploy your application in just a few clicks, allowing you to focus on building features rather than managing infrastructure.
|
||||
## ⚙️ Environment Configuration
|
||||
|
||||
## Resources
|
||||
### Development
|
||||
|
||||
Check out a few resources that may come in handy when working with NestJS:
|
||||
```env
|
||||
NODE_ENV=development
|
||||
PORT=3000
|
||||
DATABASE_URL=postgresql://postgres:postgres123@localhost:5432/claim_guard
|
||||
CORS_ORIGINS=http://localhost:3000,http://localhost:3001
|
||||
ENABLE_DOCS=true
|
||||
DEBUG=true
|
||||
LOG_LEVEL=debug
|
||||
```
|
||||
|
||||
- Visit the [NestJS Documentation](https://docs.nestjs.com) to learn more about the framework.
|
||||
- For questions and support, please visit our [Discord channel](https://discord.gg/G7Qnnhy).
|
||||
- To dive deeper and get more hands-on experience, check out our official video [courses](https://courses.nestjs.com/).
|
||||
- Deploy your application to AWS with the help of [NestJS Mau](https://mau.nestjs.com) in just a few clicks.
|
||||
- Visualize your application graph and interact with the NestJS application in real-time using [NestJS Devtools](https://devtools.nestjs.com).
|
||||
- Need help with your project (part-time to full-time)? Check out our official [enterprise support](https://enterprise.nestjs.com).
|
||||
- To stay in the loop and get updates, follow us on [X](https://x.com/nestframework) and [LinkedIn](https://linkedin.com/company/nestjs).
|
||||
- Looking for a job, or have a job to offer? Check out our official [Jobs board](https://jobs.nestjs.com).
|
||||
### Production
|
||||
|
||||
## Support
|
||||
```env
|
||||
NODE_ENV=production
|
||||
PORT=3000
|
||||
DATABASE_URL=postgresql://user:pass@host:5432/db
|
||||
CORS_ORIGINS=https://yourdomain.com
|
||||
ENABLE_DOCS=false
|
||||
DEBUG=false
|
||||
LOG_LEVEL=warn
|
||||
JWT_SECRET=strong-production-secret
|
||||
```
|
||||
|
||||
Nest is an MIT-licensed open source project. It can grow thanks to the sponsors and support by the amazing backers. If you'd like to join them, please [read more here](https://docs.nestjs.com/support).
|
||||
### Available Variables
|
||||
|
||||
## Stay in touch
|
||||
| Variable | Description | Default |
|
||||
| -------------- | ---------------------------- | ----------------------- |
|
||||
| `NODE_ENV` | Environment mode | `development` |
|
||||
| `PORT` | Application port | `3000` |
|
||||
| `DATABASE_URL` | PostgreSQL connection string | Required |
|
||||
| `CORS_ORIGINS` | Allowed CORS origins | `http://localhost:3000` |
|
||||
| `ENABLE_DOCS` | Enable Swagger documentation | `true` |
|
||||
| `LOG_LEVEL` | Logging level | `info` |
|
||||
| `JWT_SECRET` | JWT signing secret | Required for auth |
|
||||
|
||||
- Author - [Kamil Myśliwiec](https://twitter.com/kammysliwiec)
|
||||
- Website - [https://nestjs.com](https://nestjs.com/)
|
||||
- Twitter - [@nestframework](https://twitter.com/nestframework)
|
||||
## 🧪 Testing
|
||||
|
||||
## License
|
||||
### API Testing
|
||||
|
||||
Nest is [MIT licensed](https://github.com/nestjs/nest/blob/master/LICENSE).
|
||||
Use the included `icd.http` file with VS Code REST Client extension:
|
||||
|
||||
```http
|
||||
### Import ICD Data
|
||||
POST http://localhost:3000/icd/import
|
||||
|
||||
### Search ICD Codes
|
||||
GET http://localhost:3000/icd/search?search=diabetes&page=1&limit=10
|
||||
|
||||
### Get Statistics
|
||||
GET http://localhost:3000/icd/statistics
|
||||
```
|
||||
|
||||
### Unit Tests
|
||||
|
||||
```bash
|
||||
# Run tests
|
||||
npm run test
|
||||
|
||||
# Watch mode
|
||||
npm run test:watch
|
||||
|
||||
# Coverage
|
||||
npm run test:cov
|
||||
|
||||
# E2E tests
|
||||
npm run test:e2e
|
||||
```
|
||||
|
||||
## 🔧 Development
|
||||
|
||||
### Project Structure
|
||||
|
||||
```
|
||||
src/
|
||||
├── main.ts # Application entry point
|
||||
├── app.module.ts # Root module
|
||||
├── icd/ # ICD module
|
||||
│ ├── icd.controller.ts # REST endpoints
|
||||
│ ├── icd.service.ts # Business logic
|
||||
│ ├── icd.module.ts # Module definition
|
||||
│ └── dto/ # Data transfer objects
|
||||
├── health/ # Health check module
|
||||
└── prisma/ # Database schema
|
||||
```
|
||||
|
||||
### Scripts
|
||||
|
||||
```bash
|
||||
# Development
|
||||
npm run start:dev # Start with hot reload
|
||||
npm run start:debug # Start with debugger
|
||||
|
||||
# Build
|
||||
npm run build # Build for production
|
||||
npm run start:prod # Start production build
|
||||
|
||||
# Database
|
||||
npx prisma studio # Open Prisma Studio
|
||||
npx prisma db push # Push schema changes (dev only)
|
||||
npx prisma generate # Generate Prisma client
|
||||
|
||||
# Code Quality
|
||||
npm run lint # Run ESLint
|
||||
npm run format # Format with Prettier
|
||||
```
|
||||
|
||||
### Adding New Features
|
||||
|
||||
1. **Create DTOs** with validation decorators
|
||||
2. **Add Swagger decorators** for API documentation
|
||||
3. **Write unit tests** for services and controllers
|
||||
4. **Update database schema** if needed
|
||||
5. **Create migration** with descriptive name
|
||||
|
||||
## 📦 Deployment
|
||||
|
||||
### Docker Production
|
||||
|
||||
```dockerfile
|
||||
# Build stage
|
||||
FROM node:18-alpine AS builder
|
||||
WORKDIR /app
|
||||
COPY package*.json ./
|
||||
RUN npm ci --only=production
|
||||
COPY . .
|
||||
RUN npm run build
|
||||
|
||||
# Production stage
|
||||
FROM node:18-alpine AS production
|
||||
WORKDIR /app
|
||||
COPY --from=builder /app/dist ./dist
|
||||
COPY --from=builder /app/node_modules ./node_modules
|
||||
COPY package*.json ./
|
||||
EXPOSE 3000
|
||||
CMD ["node", "dist/main"]
|
||||
```
|
||||
|
||||
### Environment Setup
|
||||
|
||||
1. **Production Database**: Use managed PostgreSQL (AWS RDS, Google Cloud SQL)
|
||||
2. **Environment Variables**: Set secure values for production
|
||||
3. **SSL/TLS**: Enable HTTPS in production
|
||||
4. **Monitoring**: Add application monitoring (Prometheus, Grafana)
|
||||
5. **Logging**: Configure centralized logging
|
||||
|
||||
## 🐛 Troubleshooting
|
||||
|
||||
### Common Issues
|
||||
|
||||
**Port 5432 already in use:**
|
||||
|
||||
```bash
|
||||
# Check what's using the port
|
||||
netstat -ano | findstr :5432
|
||||
# Stop local PostgreSQL service
|
||||
```
|
||||
|
||||
**Database connection failed:**
|
||||
|
||||
```bash
|
||||
# Check if container is running
|
||||
docker-compose ps
|
||||
# Check logs
|
||||
docker-compose logs
|
||||
# Restart database
|
||||
docker-compose restart
|
||||
```
|
||||
|
||||
**Prisma can't connect:**
|
||||
|
||||
```bash
|
||||
# Verify DATABASE_URL in .env
|
||||
# Test connection
|
||||
npx prisma db pull
|
||||
```
|
||||
|
||||
**Build errors:**
|
||||
|
||||
```bash
|
||||
# Clean install
|
||||
rm -rf node_modules package-lock.json
|
||||
npm install
|
||||
```
|
||||
|
||||
## 📄 License
|
||||
|
||||
This project is licensed under the MIT License.
|
||||
|
||||
## 🤝 Contributing
|
||||
|
||||
1. Fork the repository
|
||||
2. Create a feature branch
|
||||
3. Make your changes
|
||||
4. Add tests
|
||||
5. Submit a pull request
|
||||
|
||||
## 📞 Support
|
||||
|
||||
For questions and support:
|
||||
|
||||
- Check the [API documentation](http://localhost:3000/docs)
|
||||
- Review the troubleshooting section
|
||||
- Create an issue in the repository
|
||||
|
||||
---
|
||||
|
||||
**Ready to manage ICD codes efficiently!** 🚀
|
||||
|
||||
25
docker-compose.yml
Normal file
25
docker-compose.yml
Normal file
@@ -0,0 +1,25 @@
|
||||
services:
|
||||
# PostgreSQL Database with pgvector extension
|
||||
postgres:
|
||||
image: pgvector/pgvector:pg15
|
||||
container_name: claim-guard-postgres
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
POSTGRES_DB: claim_guard
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres123
|
||||
ports:
|
||||
- '5432:5432'
|
||||
volumes:
|
||||
- postgres_data:/var/lib/postgresql/data
|
||||
- ./docker/postgres/init:/docker-entrypoint-initdb.d
|
||||
healthcheck:
|
||||
test: ['CMD-SHELL', 'pg_isready -U postgres -d claim_guard']
|
||||
interval: 10s
|
||||
timeout: 5s
|
||||
retries: 5
|
||||
start_period: 30s
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
driver: local
|
||||
32
docker/postgres/init/01-init.sql
Normal file
32
docker/postgres/init/01-init.sql
Normal file
@@ -0,0 +1,32 @@
|
||||
-- =====================================================
|
||||
-- Claim Guard Database Initialization Script
|
||||
-- =====================================================
|
||||
|
||||
-- Create database if it doesn't exist (handled by POSTGRES_DB env var)
|
||||
-- But we can create additional databases if needed
|
||||
|
||||
-- Enable pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Enable other useful extensions
|
||||
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
|
||||
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
|
||||
CREATE EXTENSION IF NOT EXISTS "btree_gin";
|
||||
CREATE EXTENSION IF NOT EXISTS "btree_gist";
|
||||
|
||||
-- Create application user if needed (optional)
|
||||
-- The main user is already created via POSTGRES_USER
|
||||
|
||||
-- Set up database permissions
|
||||
GRANT ALL PRIVILEGES ON DATABASE claim_guard TO postgres;
|
||||
|
||||
-- Create schema for application (optional, Prisma will use public by default)
|
||||
-- CREATE SCHEMA IF NOT EXISTS claim_guard;
|
||||
|
||||
-- Log successful initialization
|
||||
DO $$
|
||||
BEGIN
|
||||
RAISE NOTICE 'Claim Guard database initialized successfully';
|
||||
RAISE NOTICE 'pgvector extension: %', (SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector'));
|
||||
RAISE NOTICE 'Database ready for Prisma migrations';
|
||||
END $$;
|
||||
43
docker/scripts/reset-db.bat
Normal file
43
docker/scripts/reset-db.bat
Normal file
@@ -0,0 +1,43 @@
|
||||
@echo off
|
||||
REM =====================================================
|
||||
REM Reset PostgreSQL Database (Windows)
|
||||
REM =====================================================
|
||||
|
||||
echo 🗑️ Resetting PostgreSQL database...
|
||||
|
||||
REM Stop services
|
||||
docker-compose down
|
||||
|
||||
REM Remove volumes (this will delete all data!)
|
||||
echo ⚠️ WARNING: This will delete ALL database data!
|
||||
set /p confirm=Are you sure? (y/N):
|
||||
if /i not "%confirm%"=="y" (
|
||||
echo ❌ Operation cancelled
|
||||
exit /b 1
|
||||
)
|
||||
|
||||
docker-compose down -v
|
||||
docker volume rm claim-guard-be_postgres_data 2>nul
|
||||
docker volume rm claim-guard-be_pgadmin_data 2>nul
|
||||
|
||||
echo ✅ Database reset complete!
|
||||
echo 🐳 Starting fresh database...
|
||||
|
||||
REM Start database again
|
||||
docker-compose up -d postgres
|
||||
|
||||
echo ⏳ Waiting for PostgreSQL to be ready...
|
||||
|
||||
REM Wait for PostgreSQL to be healthy
|
||||
:wait_loop
|
||||
docker-compose exec postgres pg_isready -U postgres -d claim_guard >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo ⏳ PostgreSQL is unavailable - sleeping
|
||||
timeout /t 2 /nobreak >nul
|
||||
goto wait_loop
|
||||
)
|
||||
|
||||
echo ✅ Fresh database is ready!
|
||||
echo 📊 Run 'npx prisma migrate deploy' to setup schema
|
||||
|
||||
pause
|
||||
29
docker/scripts/start-db.bat
Normal file
29
docker/scripts/start-db.bat
Normal file
@@ -0,0 +1,29 @@
|
||||
@echo off
|
||||
REM =====================================================
|
||||
REM Start PostgreSQL Database Only (Windows)
|
||||
REM =====================================================
|
||||
|
||||
echo 🐳 Starting PostgreSQL with pgvector...
|
||||
|
||||
REM Start only the database service
|
||||
docker-compose up -d postgres
|
||||
|
||||
echo ⏳ Waiting for PostgreSQL to be ready...
|
||||
|
||||
REM Wait for PostgreSQL to be healthy
|
||||
:wait_loop
|
||||
docker-compose exec postgres pg_isready -U postgres -d claim_guard >nul 2>&1
|
||||
if %errorlevel% neq 0 (
|
||||
echo ⏳ PostgreSQL is unavailable - sleeping
|
||||
timeout /t 2 /nobreak >nul
|
||||
goto wait_loop
|
||||
)
|
||||
|
||||
echo ✅ PostgreSQL is ready!
|
||||
echo 📊 Database URL: postgresql://postgres:postgres123@localhost:5432/claim_guard
|
||||
|
||||
REM Show logs
|
||||
echo 📋 Database logs:
|
||||
docker-compose logs postgres
|
||||
|
||||
pause
|
||||
25
docker/scripts/start-db.sh
Normal file
25
docker/scripts/start-db.sh
Normal file
@@ -0,0 +1,25 @@
|
||||
#!/bin/bash
|
||||
|
||||
# =====================================================
|
||||
# Start PostgreSQL Database Only
|
||||
# =====================================================
|
||||
|
||||
echo "🐳 Starting PostgreSQL with pgvector..."
|
||||
|
||||
# Start only the database service
|
||||
docker-compose up -d postgres
|
||||
|
||||
echo "⏳ Waiting for PostgreSQL to be ready..."
|
||||
|
||||
# Wait for PostgreSQL to be healthy
|
||||
until docker-compose exec postgres pg_isready -U postgres -d claim_guard; do
|
||||
echo "⏳ PostgreSQL is unavailable - sleeping"
|
||||
sleep 2
|
||||
done
|
||||
|
||||
echo "✅ PostgreSQL is ready!"
|
||||
echo "📊 Database URL: postgresql://postgres:postgres123@localhost:5432/claim_guard"
|
||||
|
||||
# Show logs
|
||||
echo "📋 Database logs:"
|
||||
docker-compose logs postgres
|
||||
2469
package-lock.json
generated
2469
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
@@ -20,12 +20,20 @@
|
||||
"test:e2e": "jest --config ./test/jest-e2e.json"
|
||||
},
|
||||
"dependencies": {
|
||||
"@langchain/community": "^0.3.53",
|
||||
"@langchain/openai": "^0.6.9",
|
||||
"@nestjs/common": "^11.0.1",
|
||||
"@nestjs/core": "^11.0.1",
|
||||
"@nestjs/platform-express": "^11.0.1",
|
||||
"@nestjs/swagger": "^11.2.0",
|
||||
"@prisma/client": "^6.14.0",
|
||||
"class-transformer": "^0.5.1",
|
||||
"class-validator": "^0.14.2",
|
||||
"langchain": "^0.3.31",
|
||||
"pg": "^8.11.3",
|
||||
"reflect-metadata": "^0.2.2",
|
||||
"rxjs": "^7.8.1",
|
||||
"swagger-ui-express": "^5.0.1",
|
||||
"xlsx": "^0.18.5"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
||||
26
prisma/migrations/20250822104301_init/migration.sql
Normal file
26
prisma/migrations/20250822104301_init/migration.sql
Normal file
@@ -0,0 +1,26 @@
|
||||
-- Enable pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- CreateTable
|
||||
CREATE TABLE "icd_codes" (
|
||||
"id" TEXT NOT NULL,
|
||||
"code" TEXT NOT NULL,
|
||||
"display" TEXT NOT NULL,
|
||||
"version" TEXT NOT NULL,
|
||||
"category" TEXT NOT NULL,
|
||||
"embedding" vector(1536),
|
||||
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
|
||||
"updatedAt" TIMESTAMP(3) NOT NULL,
|
||||
|
||||
CONSTRAINT "icd_codes_pkey" PRIMARY KEY ("id")
|
||||
);
|
||||
|
||||
-- Create unique index on code
|
||||
CREATE UNIQUE INDEX "icd_codes_code_key" ON "icd_codes"("code");
|
||||
|
||||
-- Create ivfflat index for fast vector similarity search
|
||||
CREATE INDEX "icd_codes_embedding_idx" ON "icd_codes" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||
|
||||
-- Add comments for documentation
|
||||
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
|
||||
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';
|
||||
27
prisma/migrations/20250822104302_add_pgvector/migration.sql
Normal file
27
prisma/migrations/20250822104302_add_pgvector/migration.sql
Normal file
@@ -0,0 +1,27 @@
|
||||
-- Migration: Add pgvector support to icd_codes table
|
||||
|
||||
-- Enable pgvector extension
|
||||
CREATE EXTENSION IF NOT EXISTS vector;
|
||||
|
||||
-- Add embedding column with pgvector type
|
||||
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "embedding" vector(1536);
|
||||
|
||||
-- Add metadata column for LangChain pgvector
|
||||
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "metadata" JSONB;
|
||||
|
||||
-- Add content column for LangChain pgvector
|
||||
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "content" TEXT;
|
||||
|
||||
-- Create ivfflat index for fast vector similarity search
|
||||
CREATE INDEX IF NOT EXISTS "icd_codes_embedding_idx" ON "icd_codes"
|
||||
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
|
||||
|
||||
-- Create index on metadata for fast JSON queries
|
||||
CREATE INDEX IF NOT EXISTS "icd_codes_metadata_idx" ON "icd_codes" USING GIN (metadata);
|
||||
|
||||
-- Add comments for documentation
|
||||
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
|
||||
COMMENT ON COLUMN "icd_codes"."metadata" IS 'JSON metadata for LangChain pgvector operations';
|
||||
COMMENT ON COLUMN "icd_codes"."content" IS 'Text content for LangChain pgvector operations';
|
||||
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';
|
||||
COMMENT ON INDEX "icd_codes_metadata_idx" IS 'GIN index for fast JSON metadata queries';
|
||||
3
prisma/migrations/migration_lock.toml
Normal file
3
prisma/migrations/migration_lock.toml
Normal file
@@ -0,0 +1,3 @@
|
||||
# Please do not edit this file manually
|
||||
# It should be added in your version-control system (e.g., Git)
|
||||
provider = "postgresql"
|
||||
@@ -1,12 +1,5 @@
|
||||
// This is your Prisma schema file,
|
||||
// learn more about it in the docs: https://pris.ly/d/prisma-schema
|
||||
|
||||
// Looking for ways to speed up your queries, or scale easily with your serverless or edge functions?
|
||||
// Try Prisma Accelerate: https://pris.ly/cli/accelerate-init
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client-js"
|
||||
output = "../generated/prisma"
|
||||
}
|
||||
|
||||
datasource db {
|
||||
@@ -19,9 +12,14 @@ model IcdCode {
|
||||
code String @unique
|
||||
display String
|
||||
version String
|
||||
category String // "ICD9" or "ICD10"
|
||||
category String
|
||||
embedding Unsupported("vector")?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
metadata Json?
|
||||
content String?
|
||||
|
||||
@@index([embedding])
|
||||
@@index([metadata], type: Gin)
|
||||
@@map("icd_codes")
|
||||
}
|
||||
|
||||
@@ -2,9 +2,10 @@ import { Module } from '@nestjs/common';
|
||||
import { AppController } from './app.controller';
|
||||
import { AppService } from './app.service';
|
||||
import { IcdModule } from './icd/icd.module';
|
||||
import { HealthModule } from './health/health.module';
|
||||
|
||||
@Module({
|
||||
imports: [IcdModule],
|
||||
imports: [IcdModule, HealthModule],
|
||||
controllers: [AppController],
|
||||
providers: [AppService],
|
||||
})
|
||||
|
||||
83
src/health/health.controller.ts
Normal file
83
src/health/health.controller.ts
Normal file
@@ -0,0 +1,83 @@
|
||||
import { Controller, Get } from '@nestjs/common';
|
||||
import {
|
||||
ApiTags,
|
||||
ApiOperation,
|
||||
ApiResponse,
|
||||
ApiProperty,
|
||||
} from '@nestjs/swagger';
|
||||
|
||||
export class HealthCheckResponseDto {
|
||||
@ApiProperty({ example: 'ok' })
|
||||
status: string;
|
||||
|
||||
@ApiProperty({ example: '2024-01-01T00:00:00.000Z' })
|
||||
timestamp: string;
|
||||
|
||||
@ApiProperty({ example: 3600 })
|
||||
uptime: number;
|
||||
|
||||
@ApiProperty({ example: 'development' })
|
||||
environment: string;
|
||||
|
||||
@ApiProperty({ example: '1.0.0' })
|
||||
version: string;
|
||||
|
||||
@ApiProperty({ example: { status: 'connected' } })
|
||||
database: {
|
||||
status: string;
|
||||
};
|
||||
}
|
||||
|
||||
@ApiTags('Health')
|
||||
@Controller('health')
|
||||
export class HealthController {
|
||||
@Get()
|
||||
@ApiOperation({
|
||||
summary: 'Health check endpoint',
|
||||
description:
|
||||
'Check the health status of the application and its dependencies',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Application is healthy',
|
||||
type: HealthCheckResponseDto,
|
||||
})
|
||||
async getHealth(): Promise<HealthCheckResponseDto> {
|
||||
return {
|
||||
status: 'ok',
|
||||
timestamp: new Date().toISOString(),
|
||||
uptime: process.uptime(),
|
||||
environment: process.env.NODE_ENV || 'development',
|
||||
version: '1.0.0',
|
||||
database: {
|
||||
status: 'connected', // In real implementation, check actual DB connection
|
||||
},
|
||||
};
|
||||
}
|
||||
|
||||
@Get('ready')
|
||||
@ApiOperation({
|
||||
summary: 'Readiness check',
|
||||
description: 'Check if the application is ready to serve requests',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Application is ready',
|
||||
})
|
||||
async getReady() {
|
||||
return { status: 'ready' };
|
||||
}
|
||||
|
||||
@Get('live')
|
||||
@ApiOperation({
|
||||
summary: 'Liveness check',
|
||||
description: 'Check if the application is alive',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Application is alive',
|
||||
})
|
||||
async getLive() {
|
||||
return { status: 'alive' };
|
||||
}
|
||||
}
|
||||
7
src/health/health.module.ts
Normal file
7
src/health/health.module.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { HealthController } from './health.controller';
|
||||
|
||||
@Module({
|
||||
controllers: [HealthController],
|
||||
})
|
||||
export class HealthModule {}
|
||||
192
src/icd/dto/icd-response.dto.ts
Normal file
192
src/icd/dto/icd-response.dto.ts
Normal file
@@ -0,0 +1,192 @@
|
||||
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
|
||||
|
||||
export class IcdCodeDto {
|
||||
@ApiProperty({
|
||||
description: 'Unique identifier for the ICD code',
|
||||
example: '550e8400-e29b-41d4-a716-446655440000',
|
||||
})
|
||||
id: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD code',
|
||||
example: 'E11.9',
|
||||
})
|
||||
code: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Description of the ICD code',
|
||||
example: 'Type 2 diabetes mellitus without complications',
|
||||
})
|
||||
display: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Version of the ICD standard',
|
||||
example: '2024',
|
||||
})
|
||||
version: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD category',
|
||||
example: 'ICD10',
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
})
|
||||
category: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Creation timestamp',
|
||||
example: '2024-01-01T00:00:00.000Z',
|
||||
})
|
||||
createdAt: Date;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Last update timestamp',
|
||||
example: '2024-01-01T00:00:00.000Z',
|
||||
})
|
||||
updatedAt: Date;
|
||||
}
|
||||
|
||||
export class PaginationMetaDto {
|
||||
@ApiProperty({
|
||||
description: 'Current page number',
|
||||
example: 1,
|
||||
})
|
||||
currentPage: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of pages',
|
||||
example: 10,
|
||||
})
|
||||
totalPages: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of items',
|
||||
example: 100,
|
||||
})
|
||||
totalItems: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of items per page',
|
||||
example: 10,
|
||||
})
|
||||
itemsPerPage: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Whether there is a next page',
|
||||
example: true,
|
||||
})
|
||||
hasNextPage: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Whether there is a previous page',
|
||||
example: false,
|
||||
})
|
||||
hasPreviousPage: boolean;
|
||||
}
|
||||
|
||||
export class IcdSearchResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: true,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Array of ICD codes',
|
||||
type: [IcdCodeDto],
|
||||
})
|
||||
data: IcdCodeDto[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Pagination metadata',
|
||||
type: PaginationMetaDto,
|
||||
})
|
||||
pagination: PaginationMetaDto;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Response message',
|
||||
example: 'ICD codes retrieved successfully',
|
||||
})
|
||||
message?: string;
|
||||
}
|
||||
|
||||
export class IcdImportResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: true,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Success message',
|
||||
example: 'ICD data imported successfully',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Import statistics',
|
||||
example: {
|
||||
icd9Count: 150,
|
||||
icd10Count: 250,
|
||||
total: 400,
|
||||
},
|
||||
})
|
||||
data: {
|
||||
icd9Count: number;
|
||||
icd10Count: number;
|
||||
total: number;
|
||||
};
|
||||
}
|
||||
|
||||
export class IcdStatisticsDto {
|
||||
@ApiProperty({
|
||||
description: 'Total number of ICD9 codes',
|
||||
example: 150,
|
||||
})
|
||||
icd9Count: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of ICD10 codes',
|
||||
example: 250,
|
||||
})
|
||||
icd10Count: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of all ICD codes',
|
||||
example: 400,
|
||||
})
|
||||
total: number;
|
||||
}
|
||||
|
||||
export class IcdStatisticsResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: true,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD statistics data',
|
||||
type: IcdStatisticsDto,
|
||||
})
|
||||
data: IcdStatisticsDto;
|
||||
}
|
||||
|
||||
export class ErrorResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Request success status',
|
||||
example: false,
|
||||
})
|
||||
success: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Error message',
|
||||
example: 'Failed to process request',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Detailed error information',
|
||||
example: 'Database connection failed',
|
||||
})
|
||||
error?: string;
|
||||
}
|
||||
@@ -1,6 +1,62 @@
|
||||
import { ApiPropertyOptional } from '@nestjs/swagger';
|
||||
import {
|
||||
IsOptional,
|
||||
IsString,
|
||||
IsNumber,
|
||||
IsEnum,
|
||||
Min,
|
||||
Max,
|
||||
} from 'class-validator';
|
||||
import { Type } from 'class-transformer';
|
||||
|
||||
export enum IcdCategory {
|
||||
ICD9 = 'ICD9',
|
||||
ICD10 = 'ICD10',
|
||||
}
|
||||
|
||||
export class SearchIcdDto {
|
||||
category?: 'ICD9' | 'ICD10';
|
||||
@ApiPropertyOptional({
|
||||
description: 'ICD category to filter by',
|
||||
enum: IcdCategory,
|
||||
example: 'ICD10',
|
||||
})
|
||||
@IsOptional()
|
||||
@IsEnum(IcdCategory)
|
||||
category?: IcdCategory;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Search term for ICD code or description',
|
||||
example: 'diabetes',
|
||||
minLength: 1,
|
||||
maxLength: 100,
|
||||
})
|
||||
@IsOptional()
|
||||
@IsString()
|
||||
search?: string;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Page number for pagination',
|
||||
example: 1,
|
||||
minimum: 1,
|
||||
default: 1,
|
||||
})
|
||||
@IsOptional()
|
||||
@Type(() => Number)
|
||||
@IsNumber()
|
||||
@Min(1)
|
||||
page?: number;
|
||||
|
||||
@ApiPropertyOptional({
|
||||
description: 'Number of items per page',
|
||||
example: 10,
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
default: 10,
|
||||
})
|
||||
@IsOptional()
|
||||
@Type(() => Number)
|
||||
@IsNumber()
|
||||
@Min(1)
|
||||
@Max(100)
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
@@ -1,7 +1,22 @@
|
||||
import { Controller, Get, Post, Query, Logger } from '@nestjs/common';
|
||||
import {
|
||||
ApiTags,
|
||||
ApiOperation,
|
||||
ApiResponse,
|
||||
ApiQuery,
|
||||
ApiBadRequestResponse,
|
||||
ApiInternalServerErrorResponse,
|
||||
} from '@nestjs/swagger';
|
||||
import { IcdService } from './icd.service';
|
||||
import { SearchIcdDto } from './dto/search-icd.dto';
|
||||
import {
|
||||
IcdSearchResponseDto,
|
||||
IcdImportResponseDto,
|
||||
IcdStatisticsResponseDto,
|
||||
ErrorResponseDto,
|
||||
} from './dto/icd-response.dto';
|
||||
|
||||
@ApiTags('ICD')
|
||||
@Controller('icd')
|
||||
export class IcdController {
|
||||
private readonly logger = new Logger(IcdController.name);
|
||||
@@ -9,7 +24,25 @@ export class IcdController {
|
||||
constructor(private readonly icdService: IcdService) {}
|
||||
|
||||
@Post('import')
|
||||
async importData() {
|
||||
@ApiOperation({
|
||||
summary: 'Import ICD data from Excel files',
|
||||
description:
|
||||
'Import ICD-9 and ICD-10 codes from Excel files located in the test directory. This operation will process both ICD files and insert/update the database with the latest codes.',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'ICD data imported successfully',
|
||||
type: IcdImportResponseDto,
|
||||
})
|
||||
@ApiBadRequestResponse({
|
||||
description: 'Bad request - Invalid file format or missing files',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
@ApiInternalServerErrorResponse({
|
||||
description: 'Internal server error during import process',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
async importData(): Promise<IcdImportResponseDto> {
|
||||
try {
|
||||
this.logger.log('Starting ICD data import...');
|
||||
const result = await this.icdService.importIcdData();
|
||||
@@ -20,21 +53,62 @@ export class IcdController {
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error importing ICD data:', error);
|
||||
return {
|
||||
success: false,
|
||||
message: 'Failed to import ICD data',
|
||||
error: error.message,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@Get('search')
|
||||
@ApiOperation({
|
||||
summary: 'Search ICD codes with filters and pagination',
|
||||
description:
|
||||
'Search for ICD codes using various filters like category, search term, with pagination support. Returns a paginated list of matching ICD codes.',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'category',
|
||||
required: false,
|
||||
description: 'Filter by ICD category',
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
example: 'ICD10',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'search',
|
||||
required: false,
|
||||
description: 'Search term for ICD code or description',
|
||||
example: 'diabetes',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'page',
|
||||
required: false,
|
||||
description: 'Page number for pagination',
|
||||
example: 1,
|
||||
type: 'number',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'limit',
|
||||
required: false,
|
||||
description: 'Number of items per page (max 100)',
|
||||
example: 10,
|
||||
type: 'number',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'ICD codes retrieved successfully',
|
||||
type: IcdSearchResponseDto,
|
||||
})
|
||||
@ApiBadRequestResponse({
|
||||
description: 'Bad request - Invalid query parameters',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
@ApiInternalServerErrorResponse({
|
||||
description: 'Internal server error during search',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
async searchIcdCodes(
|
||||
@Query('category') category?: string,
|
||||
@Query('search') search?: string,
|
||||
@Query('page') page?: string,
|
||||
@Query('limit') limit?: string,
|
||||
) {
|
||||
): Promise<IcdSearchResponseDto> {
|
||||
try {
|
||||
const pageNum = page ? parseInt(page, 10) : 1;
|
||||
const limitNum = limit ? parseInt(limit, 10) : 10;
|
||||
@@ -48,20 +122,38 @@ export class IcdController {
|
||||
|
||||
return {
|
||||
success: true,
|
||||
...result,
|
||||
data: result.data,
|
||||
pagination: {
|
||||
currentPage: result.page,
|
||||
totalPages: result.totalPages,
|
||||
totalItems: result.total,
|
||||
itemsPerPage: result.limit,
|
||||
hasNextPage: result.page < result.totalPages,
|
||||
hasPreviousPage: result.page > 1,
|
||||
},
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error searching ICD codes:', error);
|
||||
return {
|
||||
success: false,
|
||||
message: 'Failed to search ICD codes',
|
||||
error: error.message,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
@Get('statistics')
|
||||
async getStatistics() {
|
||||
@ApiOperation({
|
||||
summary: 'Get ICD database statistics',
|
||||
description:
|
||||
'Retrieve statistics about the ICD database including total counts for ICD-9 and ICD-10 codes, and last import information.',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: 200,
|
||||
description: 'Statistics retrieved successfully',
|
||||
type: IcdStatisticsResponseDto,
|
||||
})
|
||||
@ApiInternalServerErrorResponse({
|
||||
description: 'Internal server error while fetching statistics',
|
||||
type: ErrorResponseDto,
|
||||
})
|
||||
async getStatistics(): Promise<IcdStatisticsResponseDto> {
|
||||
try {
|
||||
const stats = await this.icdService.getStatistics();
|
||||
return {
|
||||
@@ -70,11 +162,7 @@ export class IcdController {
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error getting statistics:', error);
|
||||
return {
|
||||
success: false,
|
||||
message: 'Failed to get statistics',
|
||||
error: error.message,
|
||||
};
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,10 +1,12 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { IcdController } from './icd.controller';
|
||||
import { IcdService } from './icd.service';
|
||||
import { PgVectorModule } from './pgvector.module';
|
||||
|
||||
@Module({
|
||||
controllers: [IcdController],
|
||||
providers: [IcdService],
|
||||
exports: [IcdService],
|
||||
imports: [PgVectorModule],
|
||||
exports: [IcdService, PgVectorModule],
|
||||
})
|
||||
export class IcdModule {}
|
||||
|
||||
@@ -181,6 +181,16 @@ export class IcdService {
|
||||
skip,
|
||||
take: limit,
|
||||
orderBy: { code: 'asc' },
|
||||
select: {
|
||||
id: true,
|
||||
code: true,
|
||||
display: true,
|
||||
version: true,
|
||||
category: true,
|
||||
createdAt: true,
|
||||
updatedAt: true,
|
||||
// Exclude embedding field to avoid deserialization error
|
||||
},
|
||||
}),
|
||||
this.prisma.icdCode.count({ where }),
|
||||
]);
|
||||
|
||||
670
src/icd/pgvector.controller.ts
Normal file
670
src/icd/pgvector.controller.ts
Normal file
@@ -0,0 +1,670 @@
|
||||
import {
|
||||
Controller,
|
||||
Get,
|
||||
Post,
|
||||
Query,
|
||||
Body,
|
||||
HttpStatus,
|
||||
ValidationPipe,
|
||||
UsePipes,
|
||||
} from '@nestjs/common';
|
||||
import {
|
||||
ApiTags,
|
||||
ApiOperation,
|
||||
ApiResponse,
|
||||
ApiQuery,
|
||||
ApiBody,
|
||||
ApiProperty,
|
||||
ApiConsumes,
|
||||
ApiProduces,
|
||||
} from '@nestjs/swagger';
|
||||
import { PgVectorService, VectorSearchResult } from './pgvector.service';
|
||||
|
||||
export class VectorSearchDto {
|
||||
@ApiProperty({
|
||||
description: 'Search query text for vector similarity search',
|
||||
example: 'diabetes mellitus type 2',
|
||||
minLength: 1,
|
||||
maxLength: 500,
|
||||
})
|
||||
query: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Maximum number of results to return',
|
||||
example: 10,
|
||||
required: false,
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
default: 10,
|
||||
})
|
||||
limit?: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'ICD category filter to narrow down search results',
|
||||
example: 'ICD10',
|
||||
required: false,
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
default: undefined,
|
||||
})
|
||||
category?: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
|
||||
example: 0.7,
|
||||
required: false,
|
||||
minimum: 0.0,
|
||||
maximum: 1.0,
|
||||
default: 0.7,
|
||||
})
|
||||
threshold?: number;
|
||||
}
|
||||
|
||||
export class EmbeddingRequestDto {
|
||||
@ApiProperty({
|
||||
description: 'Text to generate vector embedding for',
|
||||
example: 'diabetes mellitus',
|
||||
minLength: 1,
|
||||
maxLength: 1000,
|
||||
})
|
||||
text: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Embedding model to use for generation',
|
||||
example: 'text-embedding-ada-002',
|
||||
required: false,
|
||||
default: 'text-embedding-ada-002',
|
||||
})
|
||||
model?: string;
|
||||
}
|
||||
|
||||
export class VectorSearchResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Array of search results with similarity scores',
|
||||
type: 'array',
|
||||
items: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
id: {
|
||||
type: 'string',
|
||||
description: 'Unique identifier for the ICD code',
|
||||
example: 'uuid-123',
|
||||
},
|
||||
code: {
|
||||
type: 'string',
|
||||
description: 'ICD code (e.g., E11.9)',
|
||||
example: 'E11.9',
|
||||
},
|
||||
display: {
|
||||
type: 'string',
|
||||
description: 'Human readable description of the ICD code',
|
||||
example: 'Type 2 diabetes mellitus without complications',
|
||||
},
|
||||
version: {
|
||||
type: 'string',
|
||||
description: 'ICD version (e.g., ICD-10-CM)',
|
||||
example: 'ICD-10-CM',
|
||||
},
|
||||
category: {
|
||||
type: 'string',
|
||||
description: 'ICD category (ICD9 or ICD10)',
|
||||
example: 'ICD10',
|
||||
},
|
||||
similarity: {
|
||||
type: 'number',
|
||||
description: 'Similarity score between 0 and 1',
|
||||
example: 0.89,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
data: VectorSearchResult[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Total number of results found',
|
||||
example: 5,
|
||||
minimum: 0,
|
||||
})
|
||||
total: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Search query that was used',
|
||||
example: 'diabetes mellitus type 2',
|
||||
})
|
||||
query: string;
|
||||
}
|
||||
|
||||
export class EmbeddingStatsResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Total number of ICD codes in the system',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
total: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of ICD codes with generated embeddings',
|
||||
example: 500,
|
||||
minimum: 0,
|
||||
})
|
||||
withEmbeddings: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of ICD codes without embeddings',
|
||||
example: 500,
|
||||
minimum: 0,
|
||||
})
|
||||
withoutEmbeddings: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Percentage of codes with embeddings',
|
||||
example: 50.0,
|
||||
minimum: 0,
|
||||
maximum: 100,
|
||||
})
|
||||
percentage: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Current status of the vector store',
|
||||
example: 'Initialized',
|
||||
enum: ['Initialized', 'Not Initialized', 'Error'],
|
||||
})
|
||||
vectorStoreStatus: string;
|
||||
}
|
||||
|
||||
export class VectorStoreStatusDto {
|
||||
@ApiProperty({
|
||||
description: 'Whether the vector store is currently initialized',
|
||||
example: true,
|
||||
})
|
||||
initialized: boolean;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of documents currently in the vector store',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
documentCount: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Embedding model currently being used',
|
||||
example: 'OpenAI text-embedding-ada-002',
|
||||
enum: ['OpenAI text-embedding-ada-002', 'Not Available'],
|
||||
})
|
||||
embeddingModel: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Timestamp of last vector store update',
|
||||
example: '2024-01-01T00:00:00.000Z',
|
||||
})
|
||||
lastUpdated: Date;
|
||||
}
|
||||
|
||||
export class InitializeResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Success message',
|
||||
example: 'Pgvector store initialized successfully',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of documents loaded into vector store',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
documentCount: number;
|
||||
}
|
||||
|
||||
export class RefreshResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Success message',
|
||||
example: 'Pgvector store refreshed successfully',
|
||||
})
|
||||
message: string;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of documents in refreshed vector store',
|
||||
example: 1000,
|
||||
minimum: 0,
|
||||
})
|
||||
documentCount: number;
|
||||
}
|
||||
|
||||
export class GenerateEmbeddingResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Generated vector embedding array',
|
||||
type: 'array',
|
||||
items: { type: 'number' },
|
||||
example: [0.1, 0.2, 0.3, -0.1, 0.5],
|
||||
})
|
||||
embedding: number[];
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of dimensions in the embedding vector',
|
||||
example: 1536,
|
||||
minimum: 1,
|
||||
})
|
||||
dimensions: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Model used to generate the embedding',
|
||||
example: 'text-embedding-ada-002',
|
||||
})
|
||||
model: string;
|
||||
}
|
||||
|
||||
export class GenerateAllEmbeddingsResponseDto {
|
||||
@ApiProperty({
|
||||
description: 'Number of embeddings successfully processed',
|
||||
example: 500,
|
||||
minimum: 0,
|
||||
})
|
||||
processed: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Number of errors encountered during processing',
|
||||
example: 0,
|
||||
minimum: 0,
|
||||
})
|
||||
errors: number;
|
||||
|
||||
@ApiProperty({
|
||||
description: 'Summary message of the operation',
|
||||
example: 'Processed 500 embeddings with 0 errors',
|
||||
})
|
||||
message: string;
|
||||
}
|
||||
|
||||
@ApiTags('PgVector Operations')
|
||||
@Controller('pgvector')
|
||||
@UsePipes(new ValidationPipe({ transform: true }))
|
||||
export class PgVectorController {
|
||||
constructor(private readonly pgVectorService: PgVectorService) {}
|
||||
|
||||
@Post('search')
|
||||
@ApiOperation({
|
||||
summary: 'PgVector similarity search',
|
||||
description:
|
||||
'Search ICD codes using pgvector similarity with the given query. Returns results ordered by similarity score.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiBody({
|
||||
type: VectorSearchDto,
|
||||
description: 'Search parameters for pgvector similarity search',
|
||||
examples: {
|
||||
diabetes: {
|
||||
summary: 'Search for diabetes',
|
||||
value: {
|
||||
query: 'diabetes mellitus type 2',
|
||||
limit: 10,
|
||||
category: 'ICD10',
|
||||
threshold: 0.7,
|
||||
},
|
||||
},
|
||||
heart: {
|
||||
summary: 'Search for heart conditions',
|
||||
value: {
|
||||
query: 'heart attack myocardial infarction',
|
||||
limit: 5,
|
||||
category: 'ICD10',
|
||||
threshold: 0.8,
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Search results with similarity scores',
|
||||
type: VectorSearchResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid search parameters or query',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
statusCode: { type: 'number', example: 400 },
|
||||
message: { type: 'string', example: 'Query is required' },
|
||||
error: { type: 'string', example: 'Bad Request' },
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Internal server error during pgvector search',
|
||||
schema: {
|
||||
type: 'object',
|
||||
properties: {
|
||||
statusCode: { type: 'number', example: 500 },
|
||||
message: { type: 'string', example: 'Internal server error' },
|
||||
error: { type: 'string', example: 'Internal Server Error' },
|
||||
},
|
||||
},
|
||||
})
|
||||
async vectorSearch(
|
||||
@Body() searchDto: VectorSearchDto,
|
||||
): Promise<VectorSearchResponseDto> {
|
||||
const results = await this.pgVectorService.vectorSearch(
|
||||
searchDto.query,
|
||||
searchDto.limit || 10,
|
||||
searchDto.category,
|
||||
searchDto.threshold || 0.7,
|
||||
);
|
||||
|
||||
return {
|
||||
data: results,
|
||||
total: results.length,
|
||||
query: searchDto.query,
|
||||
};
|
||||
}
|
||||
|
||||
@Get('search')
|
||||
@ApiOperation({
|
||||
summary: 'PgVector search via GET',
|
||||
description:
|
||||
'Search ICD codes using pgvector similarity via query parameters. Alternative to POST method.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiQuery({
|
||||
name: 'query',
|
||||
description: 'Search query text for pgvector similarity search',
|
||||
example: 'diabetes mellitus type 2',
|
||||
required: true,
|
||||
type: 'string',
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'limit',
|
||||
description: 'Maximum number of results to return',
|
||||
example: 10,
|
||||
required: false,
|
||||
type: 'number',
|
||||
minimum: 1,
|
||||
maximum: 100,
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'category',
|
||||
description: 'ICD category filter to narrow down search results',
|
||||
example: 'ICD10',
|
||||
required: false,
|
||||
type: 'string',
|
||||
enum: ['ICD9', 'ICD10'],
|
||||
})
|
||||
@ApiQuery({
|
||||
name: 'threshold',
|
||||
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
|
||||
example: 0.7,
|
||||
required: false,
|
||||
type: 'number',
|
||||
minimum: 0.0,
|
||||
maximum: 1.0,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Search results with similarity scores',
|
||||
type: VectorSearchResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid query parameters',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Internal server error during pgvector search',
|
||||
})
|
||||
async vectorSearchGet(
|
||||
@Query('query') query: string,
|
||||
@Query('limit') limit?: string,
|
||||
@Query('category') category?: string,
|
||||
@Query('threshold') threshold?: string,
|
||||
): Promise<VectorSearchResponseDto> {
|
||||
const results = await this.pgVectorService.vectorSearch(
|
||||
query,
|
||||
limit ? parseInt(limit) : 10,
|
||||
category,
|
||||
threshold ? parseFloat(threshold) : 0.7,
|
||||
);
|
||||
|
||||
return {
|
||||
data: results,
|
||||
total: results.length,
|
||||
query,
|
||||
};
|
||||
}
|
||||
|
||||
@Post('hybrid-search')
|
||||
@ApiOperation({
|
||||
summary: 'Hybrid search (PgVector + Text)',
|
||||
description:
|
||||
'Combine pgvector similarity with text search for better and more accurate results. Combines semantic understanding with traditional text matching.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiBody({
|
||||
type: VectorSearchDto,
|
||||
description: 'Search parameters for hybrid search',
|
||||
examples: {
|
||||
diabetes: {
|
||||
summary: 'Hybrid search for diabetes',
|
||||
value: {
|
||||
query: 'diabetes mellitus type 2',
|
||||
limit: 15,
|
||||
category: 'ICD10',
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Hybrid search results combining pgvector and text search',
|
||||
type: VectorSearchResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid search parameters',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Internal server error during hybrid search',
|
||||
})
|
||||
async hybridSearch(
|
||||
@Body() searchDto: VectorSearchDto,
|
||||
): Promise<VectorSearchResponseDto> {
|
||||
const results = await this.pgVectorService.hybridSearch(
|
||||
searchDto.query,
|
||||
searchDto.limit || 10,
|
||||
searchDto.category,
|
||||
);
|
||||
|
||||
return {
|
||||
data: results,
|
||||
total: results.length,
|
||||
query: searchDto.query,
|
||||
};
|
||||
}
|
||||
|
||||
@Post('generate-embedding')
|
||||
@ApiOperation({
|
||||
summary: 'Generate text embedding',
|
||||
description:
|
||||
'Generate vector embedding for the given text using OpenAI. Returns 1536-dimensional vector.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiBody({
|
||||
type: EmbeddingRequestDto,
|
||||
description: 'Text to generate embedding for',
|
||||
examples: {
|
||||
diabetes: {
|
||||
summary: 'Generate embedding for diabetes text',
|
||||
value: {
|
||||
text: 'diabetes mellitus',
|
||||
model: 'text-embedding-ada-002',
|
||||
},
|
||||
},
|
||||
heart: {
|
||||
summary: 'Generate embedding for heart condition',
|
||||
value: {
|
||||
text: 'acute myocardial infarction',
|
||||
model: 'text-embedding-ada-002',
|
||||
},
|
||||
},
|
||||
},
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Generated embedding vector with metadata',
|
||||
type: GenerateEmbeddingResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.BAD_REQUEST,
|
||||
description: 'Invalid text input',
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error generating embedding',
|
||||
})
|
||||
async generateEmbedding(
|
||||
@Body() requestDto: EmbeddingRequestDto,
|
||||
): Promise<GenerateEmbeddingResponseDto> {
|
||||
const embedding = await this.pgVectorService.generateEmbedding(
|
||||
requestDto.text,
|
||||
requestDto.model,
|
||||
);
|
||||
|
||||
return {
|
||||
embedding,
|
||||
dimensions: embedding.length,
|
||||
model: requestDto.model || 'text-embedding-ada-002',
|
||||
};
|
||||
}
|
||||
|
||||
@Post('generate-and-store-all-embeddings')
|
||||
@ApiOperation({
|
||||
summary: 'Generate and store embeddings for all ICD codes',
|
||||
description:
|
||||
'Batch generate embeddings for all ICD codes and store them in the database with pgvector. This process may take some time depending on the number of codes.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Embedding generation and storage results summary',
|
||||
type: GenerateAllEmbeddingsResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error during batch embedding generation and storage',
|
||||
})
|
||||
async generateAndStoreAllEmbeddings(): Promise<GenerateAllEmbeddingsResponseDto> {
|
||||
const result = await this.pgVectorService.generateAndStoreAllEmbeddings();
|
||||
|
||||
return {
|
||||
...result,
|
||||
message: `Processed ${result.processed} embeddings with ${result.errors} errors`,
|
||||
};
|
||||
}
|
||||
|
||||
@Get('stats')
|
||||
@ApiOperation({
|
||||
summary: 'Get embedding statistics',
|
||||
description:
|
||||
'Get comprehensive statistics about ICD codes and their embedding status in the pgvector store.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Embedding statistics and pgvector store status',
|
||||
type: EmbeddingStatsResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error retrieving statistics',
|
||||
})
|
||||
async getEmbeddingStats(): Promise<EmbeddingStatsResponseDto> {
|
||||
return await this.pgVectorService.getEmbeddingStats();
|
||||
}
|
||||
|
||||
@Get('status')
|
||||
@ApiOperation({
|
||||
summary: 'Get pgvector store status',
|
||||
description:
|
||||
'Get current operational status of the pgvector store including initialization state and document count.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Current pgvector store status and configuration',
|
||||
type: VectorStoreStatusDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error retrieving pgvector store status',
|
||||
})
|
||||
async getVectorStoreStatus(): Promise<VectorStoreStatusDto> {
|
||||
return await this.pgVectorService.getVectorStoreStatus();
|
||||
}
|
||||
|
||||
@Post('initialize')
|
||||
@ApiOperation({
|
||||
summary: 'Initialize pgvector store',
|
||||
description:
|
||||
'Initialize or reinitialize the pgvector store with all available ICD codes. This loads codes from the database into the pgvector store.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Pgvector store initialization results',
|
||||
type: InitializeResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error during pgvector store initialization',
|
||||
})
|
||||
async initializeVectorStore(): Promise<InitializeResponseDto> {
|
||||
await this.pgVectorService.initializeVectorStore();
|
||||
const status = await this.pgVectorService.getVectorStoreStatus();
|
||||
|
||||
return {
|
||||
message: 'Pgvector store initialized successfully',
|
||||
documentCount: status.documentCount,
|
||||
};
|
||||
}
|
||||
|
||||
@Post('refresh')
|
||||
@ApiOperation({
|
||||
summary: 'Refresh pgvector store',
|
||||
description:
|
||||
'Refresh the pgvector store with the latest ICD codes data from the database. Useful after data updates.',
|
||||
tags: ['PgVector Operations'],
|
||||
})
|
||||
@ApiConsumes('application/json')
|
||||
@ApiProduces('application/json')
|
||||
@ApiResponse({
|
||||
status: HttpStatus.OK,
|
||||
description: 'Pgvector store refresh results',
|
||||
type: RefreshResponseDto,
|
||||
})
|
||||
@ApiResponse({
|
||||
status: HttpStatus.INTERNAL_SERVER_ERROR,
|
||||
description: 'Error during pgvector store refresh',
|
||||
})
|
||||
async refreshVectorStore(): Promise<RefreshResponseDto> {
|
||||
await this.pgVectorService.refreshVectorStore();
|
||||
const status = await this.pgVectorService.getVectorStoreStatus();
|
||||
|
||||
return {
|
||||
message: 'Pgvector store refreshed successfully',
|
||||
documentCount: status.documentCount,
|
||||
};
|
||||
}
|
||||
}
|
||||
10
src/icd/pgvector.module.ts
Normal file
10
src/icd/pgvector.module.ts
Normal file
@@ -0,0 +1,10 @@
|
||||
import { Module } from '@nestjs/common';
|
||||
import { PgVectorController } from './pgvector.controller';
|
||||
import { PgVectorService } from './pgvector.service';
|
||||
|
||||
@Module({
|
||||
controllers: [PgVectorController],
|
||||
providers: [PgVectorService],
|
||||
exports: [PgVectorService],
|
||||
})
|
||||
export class PgVectorModule {}
|
||||
611
src/icd/pgvector.service.ts
Normal file
611
src/icd/pgvector.service.ts
Normal file
@@ -0,0 +1,611 @@
|
||||
import { Injectable, Logger } from '@nestjs/common';
|
||||
import { PrismaClient } from '../../generated/prisma';
|
||||
import { OpenAIEmbeddings } from '@langchain/openai';
|
||||
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
|
||||
import { Document } from 'langchain/document';
|
||||
import { Pool } from 'pg';
|
||||
|
||||
export interface VectorSearchResult {
|
||||
id: string;
|
||||
code: string;
|
||||
display: string;
|
||||
version: string;
|
||||
category: string;
|
||||
similarity: number;
|
||||
}
|
||||
|
||||
export interface EmbeddingRequest {
|
||||
text: string;
|
||||
model?: string;
|
||||
}
|
||||
|
||||
@Injectable()
|
||||
export class PgVectorService {
|
||||
private readonly logger = new Logger(PgVectorService.name);
|
||||
private readonly prisma = new PrismaClient();
|
||||
private readonly pool: Pool;
|
||||
private vectorStore: PGVectorStore | null = null;
|
||||
private embeddings: OpenAIEmbeddings | null = null;
|
||||
|
||||
constructor() {
|
||||
// Initialize PostgreSQL connection pool
|
||||
this.pool = new Pool({
|
||||
connectionString: process.env.DATABASE_URL,
|
||||
max: 20,
|
||||
idleTimeoutMillis: 30000,
|
||||
connectionTimeoutMillis: 2000,
|
||||
});
|
||||
|
||||
this.initializeEmbeddings();
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize OpenAI embeddings
|
||||
*/
|
||||
private async initializeEmbeddings() {
|
||||
try {
|
||||
const apiKey = process.env.OPENAI_API_KEY;
|
||||
if (!apiKey) {
|
||||
this.logger.error(
|
||||
'OPENAI_API_KEY not found. Vector operations require OpenAI API key.',
|
||||
);
|
||||
throw new Error('OPENAI_API_KEY is required for vector operations');
|
||||
}
|
||||
|
||||
const apiModel = process.env.OPENAI_API_MODEL;
|
||||
const modelName = apiModel || 'text-embedding-ada-002';
|
||||
|
||||
this.embeddings = new OpenAIEmbeddings({
|
||||
openAIApiKey: apiKey,
|
||||
modelName: modelName,
|
||||
maxConcurrency: 5,
|
||||
});
|
||||
|
||||
this.logger.log(
|
||||
`OpenAI embeddings initialized successfully with model: ${modelName}`,
|
||||
);
|
||||
} catch (error) {
|
||||
this.logger.error('Failed to initialize OpenAI embeddings:', error);
|
||||
throw new Error(
|
||||
`Failed to initialize OpenAI embeddings: ${error.message}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize pgvector store dengan LangChain
|
||||
*/
|
||||
async initializeVectorStore(): Promise<void> {
|
||||
try {
|
||||
this.logger.log('Initializing pgvector store...');
|
||||
|
||||
if (!this.embeddings) {
|
||||
throw new Error(
|
||||
'OpenAI embeddings not initialized. Cannot create vector store.',
|
||||
);
|
||||
}
|
||||
|
||||
// Get database connection string
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
if (!connectionString) {
|
||||
throw new Error('DATABASE_URL not found');
|
||||
}
|
||||
|
||||
// Initialize pgvector store without inserting data
|
||||
this.vectorStore = await PGVectorStore.initialize(this.embeddings, {
|
||||
postgresConnectionOptions: {
|
||||
connectionString,
|
||||
},
|
||||
tableName: 'icd_codes',
|
||||
columns: {
|
||||
idColumnName: 'id',
|
||||
vectorColumnName: 'embedding',
|
||||
contentColumnName: 'content',
|
||||
metadataColumnName: 'metadata',
|
||||
},
|
||||
});
|
||||
|
||||
this.logger.log('Pgvector store initialized successfully');
|
||||
} catch (error) {
|
||||
this.logger.error('Error initializing pgvector store:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate embedding untuk text menggunakan OpenAI
|
||||
*/
|
||||
async generateEmbedding(
|
||||
text: string,
|
||||
model: string = 'text-embedding-ada-002',
|
||||
): Promise<number[]> {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Generating embedding for text: ${text.substring(0, 100)}...`,
|
||||
);
|
||||
|
||||
if (!this.embeddings) {
|
||||
throw new Error(
|
||||
'OpenAI embeddings not initialized. Please check your API configuration.',
|
||||
);
|
||||
}
|
||||
|
||||
// Use OpenAI embeddings
|
||||
const embedding = await this.embeddings.embedQuery(text);
|
||||
this.logger.log(
|
||||
`Generated OpenAI embedding with ${embedding.length} dimensions`,
|
||||
);
|
||||
return embedding;
|
||||
} catch (error) {
|
||||
this.logger.error('Error generating embedding:', error);
|
||||
throw new Error(`Failed to generate embedding: ${error.message}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate dan simpan embeddings untuk sample ICD codes (default: 100)
|
||||
*/
|
||||
async generateAndStoreAllEmbeddings(limit: number = 100): Promise<{
|
||||
processed: number;
|
||||
errors: number;
|
||||
totalSample: number;
|
||||
}> {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Starting batch embedding generation and storage for sample ${limit} ICD codes...`,
|
||||
);
|
||||
|
||||
// Get sample ICD codes without embeddings using raw SQL
|
||||
const codesWithoutEmbedding = await this.pool.query(
|
||||
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL LIMIT $1',
|
||||
[limit],
|
||||
);
|
||||
|
||||
if (codesWithoutEmbedding.rows.length === 0) {
|
||||
this.logger.log('All ICD codes already have embeddings');
|
||||
return { processed: 0, errors: 0, totalSample: 0 };
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Found ${codesWithoutEmbedding.rows.length} sample codes without embeddings (limited to ${limit})`,
|
||||
);
|
||||
|
||||
let processed = 0;
|
||||
let errors = 0;
|
||||
|
||||
// Process each code
|
||||
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
|
||||
const code = codesWithoutEmbedding.rows[i];
|
||||
try {
|
||||
// Create text representation for embedding
|
||||
const text = `${code.code} - ${code.display}`;
|
||||
|
||||
// Generate embedding
|
||||
const embedding = await this.generateEmbedding(text);
|
||||
|
||||
// Convert embedding array to proper vector format for pgvector
|
||||
const vectorString = `[${embedding.join(',')}]`;
|
||||
|
||||
// Update database with embedding, metadata, and content using raw SQL
|
||||
await this.pool.query(
|
||||
`UPDATE icd_codes
|
||||
SET embedding = $1::vector,
|
||||
metadata = $2::jsonb,
|
||||
content = $3
|
||||
WHERE id = $4`,
|
||||
[
|
||||
vectorString,
|
||||
JSON.stringify({
|
||||
id: code.id,
|
||||
code: code.code,
|
||||
display: code.display,
|
||||
version: code.version,
|
||||
category: code.category,
|
||||
}),
|
||||
text,
|
||||
code.id,
|
||||
],
|
||||
);
|
||||
|
||||
processed++;
|
||||
|
||||
if (processed % 10 === 0) {
|
||||
this.logger.log(
|
||||
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error(`Error processing code ${code.code}:`, error);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Sample embedding generation and storage completed. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
|
||||
);
|
||||
return {
|
||||
processed,
|
||||
errors,
|
||||
totalSample: codesWithoutEmbedding.rows.length,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error in generateAndStoreAllEmbeddings:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Generate dan simpan embeddings untuk sample ICD codes dengan kategori tertentu
|
||||
*/
|
||||
async generateAndStoreSampleEmbeddingsByCategory(
|
||||
category: string,
|
||||
limit: number = 100,
|
||||
): Promise<{
|
||||
processed: number;
|
||||
errors: number;
|
||||
totalSample: number;
|
||||
category: string;
|
||||
}> {
|
||||
try {
|
||||
this.logger.log(
|
||||
`Starting batch embedding generation for sample ${limit} ICD codes in category: ${category}`,
|
||||
);
|
||||
|
||||
// Get sample ICD codes by category without embeddings using raw SQL
|
||||
const codesWithoutEmbedding = await this.pool.query(
|
||||
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL AND category = $1 LIMIT $2',
|
||||
[category, limit],
|
||||
);
|
||||
|
||||
if (codesWithoutEmbedding.rows.length === 0) {
|
||||
this.logger.log(
|
||||
`No ICD codes found in category '${category}' without embeddings`,
|
||||
);
|
||||
return { processed: 0, errors: 0, totalSample: 0, category };
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Found ${codesWithoutEmbedding.rows.length} sample codes in category '${category}' without embeddings (limited to ${limit})`,
|
||||
);
|
||||
|
||||
let processed = 0;
|
||||
let errors = 0;
|
||||
|
||||
// Process each code
|
||||
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
|
||||
const code = codesWithoutEmbedding.rows[i];
|
||||
try {
|
||||
// Create text representation for embedding
|
||||
const text = `${code.code} - ${code.display}`;
|
||||
|
||||
// Generate embedding
|
||||
const embedding = await this.generateEmbedding(text);
|
||||
|
||||
// Convert embedding array to proper vector format for pgvector
|
||||
const vectorString = `[${embedding.join(',')}]`;
|
||||
|
||||
// Update database with embedding, metadata, and content using raw SQL
|
||||
await this.pool.query(
|
||||
`UPDATE icd_codes
|
||||
SET embedding = $1::vector,
|
||||
metadata = $2::jsonb,
|
||||
content = $3
|
||||
WHERE id = $4`,
|
||||
[
|
||||
vectorString,
|
||||
JSON.stringify({
|
||||
id: code.id,
|
||||
code: code.code,
|
||||
display: code.display,
|
||||
version: code.version,
|
||||
category: code.category,
|
||||
}),
|
||||
text,
|
||||
code.id,
|
||||
],
|
||||
);
|
||||
|
||||
processed++;
|
||||
|
||||
if (processed % 10 === 0) {
|
||||
this.logger.log(
|
||||
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings in category '${category}'`,
|
||||
);
|
||||
}
|
||||
} catch (error) {
|
||||
this.logger.error(`Error processing code ${code.code}:`, error);
|
||||
errors++;
|
||||
}
|
||||
}
|
||||
|
||||
this.logger.log(
|
||||
`Sample embedding generation completed for category '${category}'. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
|
||||
);
|
||||
return {
|
||||
processed,
|
||||
errors,
|
||||
totalSample: codesWithoutEmbedding.rows.length,
|
||||
category,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error(
|
||||
`Error in generateAndStoreSampleEmbeddingsByCategory for category '${category}':`,
|
||||
error,
|
||||
);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Vector similarity search menggunakan pgvector
|
||||
*/
|
||||
async vectorSearch(
|
||||
query: string,
|
||||
limit: number = 10,
|
||||
category?: string,
|
||||
threshold: number = 0.7,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
this.logger.log(`Performing pgvector search for: ${query}`);
|
||||
|
||||
if (!this.embeddings) {
|
||||
throw new Error('OpenAI embeddings not initialized');
|
||||
}
|
||||
|
||||
// Generate embedding for query
|
||||
const queryEmbedding = await this.generateEmbedding(query);
|
||||
|
||||
// Convert embedding array to proper vector format for pgvector
|
||||
const vectorString = `[${queryEmbedding.join(',')}]`;
|
||||
|
||||
// Build SQL query for vector similarity search
|
||||
let sql = `
|
||||
SELECT
|
||||
id, code, display, version, category,
|
||||
1 - (embedding <=> $1::vector) as similarity
|
||||
FROM icd_codes
|
||||
WHERE embedding IS NOT NULL
|
||||
`;
|
||||
|
||||
const params: any[] = [vectorString];
|
||||
let paramIndex = 2;
|
||||
|
||||
if (category) {
|
||||
sql += ` AND category = $${paramIndex}`;
|
||||
params.push(category);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
sql += ` ORDER BY embedding <=> $1::vector ASC LIMIT $${paramIndex}`;
|
||||
params.push(limit);
|
||||
|
||||
// Execute raw SQL query
|
||||
const result = await this.pool.query(sql, params);
|
||||
|
||||
// Transform and filter results
|
||||
const filteredResults: VectorSearchResult[] = result.rows
|
||||
.filter((row: any) => row.similarity >= threshold)
|
||||
.map((row: any) => ({
|
||||
id: row.id,
|
||||
code: row.code,
|
||||
display: row.display,
|
||||
version: row.version,
|
||||
category: row.category,
|
||||
similarity: parseFloat(row.similarity),
|
||||
}));
|
||||
|
||||
this.logger.log(
|
||||
`Pgvector search returned ${filteredResults.length} results for query: "${query}"`,
|
||||
);
|
||||
return filteredResults;
|
||||
} catch (error) {
|
||||
this.logger.error('Error in pgvector search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Hybrid search: combine vector similarity dengan text search
|
||||
*/
|
||||
async hybridSearch(
|
||||
query: string,
|
||||
limit: number = 10,
|
||||
category?: string,
|
||||
vectorWeight: number = 0.7,
|
||||
textWeight: number = 0.3,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
this.logger.log(`Performing hybrid search for: ${query}`);
|
||||
|
||||
// Get vector search results
|
||||
const vectorResults = await this.vectorSearch(
|
||||
query,
|
||||
limit * 2,
|
||||
category,
|
||||
0.5,
|
||||
);
|
||||
|
||||
// Get text search results
|
||||
const textResults = await this.textSearch(query, limit * 2, category);
|
||||
|
||||
// Combine and score results
|
||||
const combinedResults = new Map<string, VectorSearchResult>();
|
||||
|
||||
// Add vector results
|
||||
for (const result of vectorResults) {
|
||||
combinedResults.set(result.id, {
|
||||
...result,
|
||||
similarity: result.similarity * vectorWeight,
|
||||
});
|
||||
}
|
||||
|
||||
// Add text results with text scoring
|
||||
for (const result of textResults) {
|
||||
const existing = combinedResults.get(result.id);
|
||||
if (existing) {
|
||||
// Combine scores
|
||||
existing.similarity += (result.similarity || 0.5) * textWeight;
|
||||
} else {
|
||||
combinedResults.set(result.id, {
|
||||
...result,
|
||||
similarity: (result.similarity || 0.5) * textWeight,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
// Convert to array, sort by combined score, and limit
|
||||
const results = Array.from(combinedResults.values());
|
||||
results.sort((a, b) => b.similarity - a.similarity);
|
||||
|
||||
return results.slice(0, limit);
|
||||
} catch (error) {
|
||||
this.logger.error('Error in hybrid search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Text-based search dengan scoring
|
||||
*/
|
||||
private async textSearch(
|
||||
query: string,
|
||||
limit: number,
|
||||
category?: string,
|
||||
): Promise<VectorSearchResult[]> {
|
||||
try {
|
||||
let sql = 'SELECT id, code, display, version, category FROM icd_codes';
|
||||
const params: any[] = [];
|
||||
let whereConditions: string[] = [];
|
||||
let paramIndex = 1;
|
||||
|
||||
if (category) {
|
||||
whereConditions.push(`category = $${paramIndex}`);
|
||||
params.push(category);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
if (query) {
|
||||
whereConditions.push(
|
||||
`(code ILIKE $${paramIndex} OR display ILIKE $${paramIndex})`,
|
||||
);
|
||||
params.push(`%${query}%`);
|
||||
paramIndex++;
|
||||
}
|
||||
|
||||
if (whereConditions.length > 0) {
|
||||
sql += ' WHERE ' + whereConditions.join(' AND ');
|
||||
}
|
||||
|
||||
sql += ' ORDER BY code ASC LIMIT $' + paramIndex;
|
||||
params.push(limit);
|
||||
|
||||
const result = await this.pool.query(sql, params);
|
||||
|
||||
return result.rows.map((code) => ({
|
||||
id: code.id,
|
||||
code: code.code,
|
||||
display: code.display,
|
||||
version: code.version,
|
||||
category: code.category,
|
||||
similarity: 0.5, // Default text similarity score
|
||||
}));
|
||||
} catch (error) {
|
||||
this.logger.error('Error in text search:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get embedding statistics
|
||||
*/
|
||||
async getEmbeddingStats(): Promise<{
|
||||
total: number;
|
||||
withEmbeddings: number;
|
||||
withoutEmbeddings: number;
|
||||
percentage: number;
|
||||
vectorStoreStatus: string;
|
||||
}> {
|
||||
try {
|
||||
// Use raw SQL to get embedding statistics
|
||||
const [totalResult, withEmbeddingsResult] = await Promise.all([
|
||||
this.pool.query('SELECT COUNT(*) as count FROM icd_codes'),
|
||||
this.pool.query(
|
||||
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
|
||||
),
|
||||
]);
|
||||
|
||||
const total = parseInt(totalResult.rows[0].count);
|
||||
const withEmbeddings = parseInt(withEmbeddingsResult.rows[0].count);
|
||||
const withoutEmbeddings = total - withEmbeddings;
|
||||
const percentage = total > 0 ? (withEmbeddings / total) * 100 : 0;
|
||||
const vectorStoreStatus = this.vectorStore
|
||||
? 'Initialized'
|
||||
: 'Not Initialized';
|
||||
|
||||
return {
|
||||
total,
|
||||
withEmbeddings,
|
||||
withoutEmbeddings,
|
||||
percentage: Math.round(percentage * 100) / 100,
|
||||
vectorStoreStatus,
|
||||
};
|
||||
} catch (error) {
|
||||
this.logger.error('Error getting embedding stats:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh vector store dengan data terbaru
|
||||
*/
|
||||
async refreshVectorStore(): Promise<void> {
|
||||
try {
|
||||
this.logger.log('Refreshing pgvector store...');
|
||||
await this.initializeVectorStore();
|
||||
this.logger.log('Pgvector store refreshed successfully');
|
||||
} catch (error) {
|
||||
this.logger.error('Error refreshing pgvector store:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get vector store status
|
||||
*/
|
||||
async getVectorStoreStatus(): Promise<{
|
||||
initialized: boolean;
|
||||
documentCount: number;
|
||||
embeddingModel: string;
|
||||
lastUpdated: Date;
|
||||
}> {
|
||||
try {
|
||||
// Get document count from database using raw SQL
|
||||
const result = await this.pool.query(
|
||||
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
|
||||
);
|
||||
const documentCount = parseInt(result.rows[0].count);
|
||||
|
||||
const status = {
|
||||
initialized: !!this.vectorStore,
|
||||
documentCount,
|
||||
embeddingModel: this.embeddings
|
||||
? `OpenAI ${process.env.OPENAI_API_MODEL || 'text-embedding-ada-002'}`
|
||||
: 'Not Available',
|
||||
lastUpdated: new Date(),
|
||||
};
|
||||
|
||||
return status;
|
||||
} catch (error) {
|
||||
this.logger.error('Error getting vector store status:', error);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Cleanup resources
|
||||
*/
|
||||
async onModuleDestroy() {
|
||||
await this.prisma.$disconnect();
|
||||
await this.pool.end();
|
||||
}
|
||||
}
|
||||
129
src/main.ts
129
src/main.ts
@@ -1,8 +1,133 @@
|
||||
import { NestFactory } from '@nestjs/core';
|
||||
import { AppModule } from './app.module';
|
||||
import { Logger, ValidationPipe } from '@nestjs/common';
|
||||
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
|
||||
|
||||
async function bootstrap() {
|
||||
const logger = new Logger('Bootstrap');
|
||||
|
||||
const app = await NestFactory.create(AppModule);
|
||||
await app.listen(process.env.PORT ?? 3000);
|
||||
|
||||
// Environment configuration
|
||||
const port = process.env.PORT ?? 3000;
|
||||
const host = process.env.HOST ?? 'localhost';
|
||||
const nodeEnv = process.env.NODE_ENV ?? 'development';
|
||||
|
||||
// CORS Configuration
|
||||
const corsOrigins = process.env.CORS_ORIGINS?.split(',') ?? [
|
||||
'http://localhost:3000',
|
||||
];
|
||||
const corsMethods = process.env.CORS_METHODS?.split(',') ?? [
|
||||
'GET',
|
||||
'HEAD',
|
||||
'PUT',
|
||||
'PATCH',
|
||||
'POST',
|
||||
'DELETE',
|
||||
'OPTIONS',
|
||||
];
|
||||
const corsHeaders = process.env.CORS_HEADERS?.split(',') ?? [
|
||||
'Content-Type',
|
||||
'Accept',
|
||||
'Authorization',
|
||||
'X-Requested-With',
|
||||
];
|
||||
const corsCredentials = process.env.CORS_CREDENTIALS === 'true';
|
||||
|
||||
// Enable CORS
|
||||
app.enableCors({
|
||||
origin: corsOrigins,
|
||||
methods: corsMethods,
|
||||
allowedHeaders: corsHeaders,
|
||||
credentials: corsCredentials,
|
||||
});
|
||||
|
||||
// Enable global validation pipe
|
||||
app.useGlobalPipes(
|
||||
new ValidationPipe({
|
||||
whitelist: true,
|
||||
forbidNonWhitelisted: true,
|
||||
transform: true,
|
||||
transformOptions: {
|
||||
enableImplicitConversion: true,
|
||||
},
|
||||
}),
|
||||
);
|
||||
|
||||
// Setup Swagger Documentation
|
||||
if (process.env.ENABLE_DOCS === 'true') {
|
||||
const config = new DocumentBuilder()
|
||||
.setTitle('Claim Guard API')
|
||||
.setDescription(
|
||||
'API documentation for Claim Guard Backend - ICD Code Management System',
|
||||
)
|
||||
.setVersion('1.0.0')
|
||||
.setContact(
|
||||
'Development Team',
|
||||
'https://github.com/your-org/claim-guard-be',
|
||||
'dev@yourdomain.com',
|
||||
)
|
||||
.setLicense('MIT', 'https://opensource.org/licenses/MIT')
|
||||
.addServer(
|
||||
process.env.APP_URL || 'http://localhost:3000',
|
||||
'Development Server',
|
||||
)
|
||||
.addTag('ICD', 'ICD Code management operations')
|
||||
.addTag('Health', 'Application health and monitoring')
|
||||
.addBearerAuth(
|
||||
{
|
||||
type: 'http',
|
||||
scheme: 'bearer',
|
||||
bearerFormat: 'JWT',
|
||||
name: 'JWT',
|
||||
description: 'Enter JWT token',
|
||||
in: 'header',
|
||||
},
|
||||
'JWT-auth',
|
||||
)
|
||||
.build();
|
||||
|
||||
const document = SwaggerModule.createDocument(app, config);
|
||||
SwaggerModule.setup('docs', app, document, {
|
||||
swaggerOptions: {
|
||||
persistAuthorization: true,
|
||||
docExpansion: 'none',
|
||||
filter: true,
|
||||
showRequestDuration: true,
|
||||
},
|
||||
customSiteTitle: 'Claim Guard API Documentation',
|
||||
customfavIcon: '/favicon.ico',
|
||||
customCss: '.swagger-ui .topbar { display: none }',
|
||||
});
|
||||
|
||||
logger.log(
|
||||
`📚 Swagger Documentation enabled at: http://${host}:${port}/docs`,
|
||||
);
|
||||
}
|
||||
bootstrap();
|
||||
|
||||
// Global prefix for API endpoints (optional)
|
||||
// app.setGlobalPrefix('api/v1');
|
||||
|
||||
// Request timeout
|
||||
const requestTimeout = parseInt(process.env.REQUEST_TIMEOUT ?? '30000');
|
||||
|
||||
// Graceful shutdown
|
||||
app.enableShutdownHooks();
|
||||
|
||||
await app.listen(port, host);
|
||||
|
||||
logger.log(`🚀 Application is running on: http://${host}:${port}`);
|
||||
logger.log(`🌍 Environment: ${nodeEnv}`);
|
||||
logger.log(`🔐 CORS Origins: ${corsOrigins.join(', ')}`);
|
||||
|
||||
if (process.env.HEALTH_CHECK_ENABLED === 'true') {
|
||||
logger.log(
|
||||
`❤️ Health Check available at: http://${host}:${port}${process.env.HEALTH_CHECK_PATH || '/health'}`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
bootstrap().catch((error) => {
|
||||
console.error('❌ Error starting server:', error);
|
||||
process.exit(1);
|
||||
});
|
||||
|
||||
112
verify_migration.sql
Normal file
112
verify_migration.sql
Normal file
@@ -0,0 +1,112 @@
|
||||
-- =====================================================
|
||||
-- VERIFICATION: pgvector Migration Success
|
||||
-- =====================================================
|
||||
-- File: verify_migration.sql
|
||||
--
|
||||
-- Cara penggunaan:
|
||||
-- 1. Connect ke database: psql -d claim_guard -U username
|
||||
-- 2. Jalankan: \i verify_migration.sql
|
||||
-- =====================================================
|
||||
|
||||
\echo '🎉 VERIFYING pgvector MIGRATION SUCCESS 🎉'
|
||||
\echo '====================================================='
|
||||
|
||||
-- Check pgvector extension
|
||||
SELECT
|
||||
'pgvector Extension' as component,
|
||||
CASE
|
||||
WHEN EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector')
|
||||
THEN '✅ INSTALLED'
|
||||
ELSE '❌ NOT INSTALLED'
|
||||
END as status;
|
||||
|
||||
-- Check table structure
|
||||
SELECT
|
||||
'icd_codes Table' as component,
|
||||
CASE
|
||||
WHEN EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'icd_codes')
|
||||
THEN '✅ EXISTS'
|
||||
ELSE '❌ MISSING'
|
||||
END as status;
|
||||
|
||||
-- Check all columns
|
||||
SELECT
|
||||
column_name,
|
||||
data_type,
|
||||
is_nullable,
|
||||
CASE
|
||||
WHEN column_name IN ('embedding', 'metadata', 'content')
|
||||
THEN '✅ pgvector column'
|
||||
ELSE 'ℹ️ Standard column'
|
||||
END as status
|
||||
FROM information_schema.columns
|
||||
WHERE table_name = 'icd_codes'
|
||||
ORDER BY column_name;
|
||||
|
||||
-- Check indexes
|
||||
SELECT
|
||||
indexname,
|
||||
indexdef,
|
||||
CASE
|
||||
WHEN indexname LIKE '%embedding%' OR indexname LIKE '%metadata%'
|
||||
THEN '✅ Performance index'
|
||||
ELSE 'ℹ️ Standard index'
|
||||
END as status
|
||||
FROM pg_indexes
|
||||
WHERE tablename = 'icd_codes'
|
||||
ORDER BY indexname;
|
||||
|
||||
-- Test pgvector functionality
|
||||
\echo ''
|
||||
\echo '🧪 TESTING pgvector FUNCTIONALITY 🧪'
|
||||
|
||||
-- Test vector creation
|
||||
SELECT
|
||||
'[1,2,3,4,5]'::vector(5) as test_vector_5d,
|
||||
'[0.1,0.2,0.3]'::vector(3) as test_vector_3d;
|
||||
|
||||
-- Test table data
|
||||
SELECT
|
||||
COUNT(*) as total_rows,
|
||||
COUNT(embedding) as rows_with_embeddings,
|
||||
COUNT(metadata) as rows_with_metadata,
|
||||
COUNT(content) as rows_with_content
|
||||
FROM icd_codes;
|
||||
|
||||
-- Check sample data structure
|
||||
SELECT
|
||||
id,
|
||||
code,
|
||||
display,
|
||||
version,
|
||||
category,
|
||||
CASE
|
||||
WHEN embedding IS NOT NULL THEN '✅ Has embedding'
|
||||
ELSE '❌ No embedding'
|
||||
END as embedding_status,
|
||||
CASE
|
||||
WHEN metadata IS NOT NULL THEN '✅ Has metadata'
|
||||
ELSE '❌ No metadata'
|
||||
END as metadata_status,
|
||||
CASE
|
||||
WHEN content IS NOT NULL THEN '✅ Has content'
|
||||
ELSE '❌ No content'
|
||||
END as content_status
|
||||
FROM icd_codes
|
||||
LIMIT 5;
|
||||
|
||||
\echo ''
|
||||
\echo '🎯 MIGRATION VERIFICATION COMPLETE! 🎯'
|
||||
\echo '====================================================='
|
||||
\echo '✅ pgvector extension installed'
|
||||
\echo '✅ embedding column (vector type) added'
|
||||
\echo '✅ metadata column (JSONB) added'
|
||||
\echo '✅ content column (TEXT) added'
|
||||
\echo '✅ Performance indexes created'
|
||||
\echo ''
|
||||
\echo '🚀 NEXT STEPS:'
|
||||
\echo '1. Start application: npm run start:dev'
|
||||
\echo '2. Initialize vector store: POST /pgvector/initialize'
|
||||
\echo '3. Generate embeddings: POST /pgvector/generate-and-store-all-embeddings'
|
||||
\echo '4. Test vector search: POST /pgvector/search'
|
||||
\echo '====================================================='
|
||||
Reference in New Issue
Block a user