add pg vector and embed

This commit is contained in:
2025-08-22 19:34:54 +07:00
parent 21567a0a7c
commit b77beb2d85
27 changed files with 5273 additions and 216 deletions

113
.dockerignore Normal file
View File

@@ -0,0 +1,113 @@
# Dependencies
node_modules
npm-debug.log*
yarn-debug.log*
yarn-error.log*
# Production build
dist
# Environment files
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# IDE files
.vscode
.idea
*.swp
*.swo
*~
# OS generated files
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
logs
*.log
# Coverage directory used by tools like istanbul
coverage
*.lcov
# nyc test coverage
.nyc_output
# Dependency directories
node_modules/
jspm_packages/
# Optional npm cache directory
.npm
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# next.js build output
.next
# nuxt.js build output
.nuxt
# vuepress build output
.vuepress/dist
# Serverless directories
.serverless
# FuseBox cache
.fusebox/
# DynamoDB Local files
.dynamodb/
# TernJS port file
.tern-port
# Stores VSCode versions used for testing VSCode extensions
.vscode-test
# Generated files
generated/
# Git
.git
.gitignore
# Docker
Dockerfile
.dockerignore
docker-compose*.yml
# Documentation
*.md
!README.md
# Test files (keep only the Excel files)
test/**
!test/*.xlsx
# Uploads (will be created at runtime)
uploads/
# Temporary files
.temp
.tmp

179
.env.example Normal file
View File

@@ -0,0 +1,179 @@
# =================================
# APPLICATION CONFIGURATION
# =================================
# Environment mode: development, staging, production
NODE_ENV=development
# Application port
PORT=3000
# Application host (for binding)
HOST=localhost
# Application base URL (for CORS and other purposes)
APP_URL=http://localhost:3000
# =================================
# DATABASE CONFIGURATION
# =================================
# PostgreSQL Database URL
# Format: postgresql://username:password@host:port/database_name
DATABASE_URL=postgresql://username:password@localhost:5432/claim_guard
# Database connection pool settings
DB_POOL_MIN=2
DB_POOL_MAX=10
# =================================
# CORS CONFIGURATION
# =================================
# Allowed origins for CORS (comma-separated)
# Use * for allow all origins (NOT recommended for production)
# Examples:
# - Development: http://localhost:3000,http://localhost:3001
# - Production: https://yourdomain.com,https://www.yourdomain.com
CORS_ORIGINS=http://localhost:3000,http://localhost:3001,http://localhost:8080
# Allowed methods for CORS (comma-separated)
CORS_METHODS=GET,HEAD,PUT,PATCH,POST,DELETE,OPTIONS
# Allowed headers for CORS (comma-separated)
CORS_HEADERS=Content-Type,Accept,Authorization,X-Requested-With
# Allow credentials in CORS requests
CORS_CREDENTIALS=true
# =================================
# SECURITY CONFIGURATION
# =================================
# JWT Secret for authentication (generate strong secret for production)
JWT_SECRET=your-super-secret-jwt-key-change-this-in-production
# JWT Token expiration time
JWT_EXPIRES_IN=24h
# JWT Refresh token expiration
JWT_REFRESH_EXPIRES_IN=7d
# API Rate limiting (requests per minute)
RATE_LIMIT_MAX=100
# =================================
# LOGGING CONFIGURATION
# =================================
# Log level: error, warn, info, debug, verbose
LOG_LEVEL=info
# Log format: json, simple
LOG_FORMAT=simple
# Enable request logging
LOG_REQUESTS=true
# =================================
# FILE UPLOAD CONFIGURATION
# =================================
# Maximum file size for uploads (in bytes)
# 10MB = 10485760, 50MB = 52428800
MAX_FILE_SIZE=10485760
# Allowed file types for upload (comma-separated)
ALLOWED_FILE_TYPES=.xlsx,.xls,.csv
# Upload directory path
UPLOAD_DIR=./uploads
# =================================
# CACHE CONFIGURATION
# =================================
# Redis URL for caching (optional)
# REDIS_URL=redis://localhost:6379
# Cache TTL in seconds (default: 1 hour)
CACHE_TTL=3600
# =================================
# EMAIL CONFIGURATION (Optional)
# =================================
# SMTP configuration for sending emails
# SMTP_HOST=smtp.gmail.com
# SMTP_PORT=587
# SMTP_SECURE=false
# SMTP_USER=your-email@gmail.com
# SMTP_PASS=your-email-password
# Email from address
# EMAIL_FROM=noreply@yourdomain.com
# =================================
# THIRD-PARTY INTEGRATIONS
# =================================
# External API keys
# EXTERNAL_API_KEY=your-api-key
# EXTERNAL_API_URL=https://api.external-service.com
# =================================
# DEVELOPMENT SETTINGS
# =================================
# Enable API documentation (Swagger)
ENABLE_DOCS=true
# Enable debug mode
DEBUG=true
# Enable database logging
DB_LOGGING=true
# =================================
# PRODUCTION SETTINGS
# =================================
# When NODE_ENV=production, ensure these are set:
# - Strong JWT_SECRET
# - Specific CORS_ORIGINS (not *)
# - DB_LOGGING=false
# - DEBUG=false
# - LOG_LEVEL=warn or error
# Health check endpoint settings
HEALTH_CHECK_ENABLED=true
# Request timeout in milliseconds
REQUEST_TIMEOUT=30000
# =================================
# ICD SPECIFIC CONFIGURATION
# =================================
# Path to ICD data files
ICD9_FILE_PATH=./test/[PUBLIC] ICD-9CM e-klaim.xlsx
ICD10_FILE_PATH=./test/[PUBLIC] ICD-10 e-klaim.xlsx
# ICD import batch size
ICD_IMPORT_BATCH_SIZE=1000
# =================================
# MONITORING & METRICS
# =================================
# Enable application metrics
METRICS_ENABLED=true
# Metrics endpoint path
METRICS_PATH=/metrics
# Enable health check endpoint
HEALTH_CHECK_PATH=/health
OPENAI_API_KEY=xxxxxx
OPENAI_API_MODEL=text-embedding-ada-002

473
README.md
View File

@@ -1,194 +1,397 @@
<p align="center">
<a href="http://nestjs.com/" target="blank"><img src="https://nestjs.com/img/logo-small.svg" width="120" alt="Nest Logo" /></a>
</p>
# 🏥 Claim Guard Backend
[circleci-image]: https://img.shields.io/circleci/build/github/nestjs/nest/master?token=abc123def456
[circleci-url]: https://circleci.com/gh/nestjs/nest
> **NestJS application for managing ICD-9 and ICD-10 medical codes with Excel import functionality**
<p align="center">A progressive <a href="http://nodejs.org" target="_blank">Node.js</a> framework for building efficient and scalable server-side applications.</p>
<p align="center">
<a href="https://www.npmjs.com/~nestjscore" target="_blank"><img src="https://img.shields.io/npm/v/@nestjs/core.svg" alt="NPM Version" /></a>
<a href="https://www.npmjs.com/~nestjscore" target="_blank"><img src="https://img.shields.io/npm/l/@nestjs/core.svg" alt="Package License" /></a>
<a href="https://www.npmjs.com/~nestjscore" target="_blank"><img src="https://img.shields.io/npm/dm/@nestjs/common.svg" alt="NPM Downloads" /></a>
<a href="https://circleci.com/gh/nestjs/nest" target="_blank"><img src="https://img.shields.io/circleci/build/github/nestjs/nest/master" alt="CircleCI" /></a>
<a href="https://discord.gg/G7Qnnhy" target="_blank"><img src="https://img.shields.io/badge/discord-online-brightgreen.svg" alt="Discord"/></a>
<a href="https://opencollective.com/nest#backer" target="_blank"><img src="https://opencollective.com/nest/backers/badge.svg" alt="Backers on Open Collective" /></a>
<a href="https://opencollective.com/nest#sponsor" target="_blank"><img src="https://opencollective.com/nest/sponsors/badge.svg" alt="Sponsors on Open Collective" /></a>
<a href="https://paypal.me/kamilmysliwiec" target="_blank"><img src="https://img.shields.io/badge/Donate-PayPal-ff3f59.svg" alt="Donate us"/></a>
<a href="https://opencollective.com/nest#sponsor" target="_blank"><img src="https://img.shields.io/badge/Support%20us-Open%20Collective-41B883.svg" alt="Support us"></a>
<a href="https://twitter.com/nestframework" target="_blank"><img src="https://img.shields.io/twitter/follow/nestframework.svg?style=social&label=Follow" alt="Follow us on Twitter"></a>
</p>
<!--[![Backers on Open Collective](https://opencollective.com/nest/backers/badge.svg)](https://opencollective.com/nest#backer)
[![Sponsors on Open Collective](https://opencollective.com/nest/sponsors/badge.svg)](https://opencollective.com/nest#sponsor)-->
[![NestJS](https://img.shields.io/badge/NestJS-E0234E?style=for-the-badge&logo=nestjs&logoColor=white)](https://nestjs.com/)
[![TypeScript](https://img.shields.io/badge/TypeScript-007ACC?style=for-the-badge&logo=typescript&logoColor=white)](https://www.typescriptlang.org/)
[![PostgreSQL](https://img.shields.io/badge/PostgreSQL-316192?style=for-the-badge&logo=postgresql&logoColor=white)](https://www.postgresql.org/)
[![Docker](https://img.shields.io/badge/Docker-2CA5E0?style=for-the-badge&logo=docker&logoColor=white)](https://www.docker.com/)
[![Prisma](https://img.shields.io/badge/Prisma-3982CE?style=for-the-badge&logo=Prisma&logoColor=white)](https://www.prisma.io/)
## Description
## ✨ Features
Claim Guard Backend - A NestJS application for managing ICD-9 and ICD-10 medical codes with Excel import functionality.
- 🏥 **ICD Code Management** - Import and manage ICD-9 and ICD-10 medical codes
- 📊 **Excel Import** - Read data from Excel files and store in PostgreSQL
- 🔍 **Search & Filter** - Search codes by category, code, or display text
- 🌐 **REST API** - Full REST API with Swagger documentation
- 📄 **Pagination** - Built-in pagination for large datasets
- 🐳 **Docker Ready** - PostgreSQL with pgvector extension
- 🔄 **Database Migrations** - Prisma migration system
-**Input Validation** - Type-safe DTOs with validation
- 📚 **API Documentation** - Interactive Swagger UI
## Features
## 🚀 Quick Start
- **ICD Code Management**: Import and manage ICD-9 and ICD-10 medical codes
- **Excel Import**: Read data from Excel files and store in PostgreSQL database
- **Search & Filter**: Search codes by category, code, or display text
- **REST API**: Full REST API endpoints for accessing ICD data
- **Pagination**: Built-in pagination support for large datasets
### Prerequisites
## ICD Service Endpoints
- Node.js 18+
- Docker & Docker Compose
- Git
### Import Data
```bash
POST /icd/import
```
Imports ICD-9 and ICD-10 data from Excel files in the `test/` directory.
### Search Codes
```bash
GET /icd/search?category=ICD10&search=diabetes&page=1&limit=10
```
Search ICD codes with optional filters:
- `category`: Filter by ICD9 or ICD10
- `search`: Search in code or display text
- `page`: Page number (default: 1)
- `limit`: Items per page (default: 10)
### Get Statistics
```bash
GET /icd/statistics
```
Returns count statistics for ICD codes.
## Database Schema
The application uses PostgreSQL with Prisma ORM. The ICD codes are stored in the `icd_codes` table with the following structure:
```sql
CREATE TABLE "icd_codes" (
"id" TEXT PRIMARY KEY DEFAULT gen_random_uuid(),
"code" TEXT UNIQUE NOT NULL,
"display" TEXT NOT NULL,
"version" TEXT NOT NULL,
"category" TEXT NOT NULL, -- "ICD9" or "ICD10"
"createdAt" TIMESTAMP DEFAULT NOW(),
"updatedAt" TIMESTAMP DEFAULT NOW()
);
```
**ID Format**: The `id` field now uses UUID (Universal Unique Identifier) format like `550e8400-e29b-41d4-a716-446655440000` instead of CUID.
## Setup Instructions
1. **Install Dependencies**
### 1. Clone & Install
```bash
git clone <repository-url>
cd claim-guard-be
npm install
```
2. **Database Setup**
Create a `.env` file with your PostgreSQL connection:
### 2. Start Database
```bash
DATABASE_URL="postgresql://username:password@localhost:5432/claim_guard_db?schema=public"
# Start PostgreSQL with pgvector
docker-compose up -d
# Verify database is running
docker-compose ps
```
3. **Generate Prisma Client**
### 3. Setup Environment
```bash
# Copy environment template (edit as needed)
copy .env.example .env
# Update DATABASE_URL in .env:
DATABASE_URL=postgresql://postgres:postgres123@localhost:5432/claim_guard
```
### 4. Run Migrations
```bash
# Apply database schema
npx prisma migrate deploy
# Generate Prisma client
npx prisma generate
```
4. **Run Database Migrations**
### 5. Start Application
```bash
npx prisma db push
# Development mode
npm run start:dev
# Production mode
npm run build
npm run start:prod
```
5. **Place Excel Files**
Ensure the following files are in the `test/` directory:
### 6. Access Services
- `[PUBLIC] ICD-9CM e-klaim.xlsx`
- `[PUBLIC] ICD-10 e-klaim.xlsx`
- **API**: http://localhost:3000
- **Swagger Docs**: http://localhost:3000/docs
- **Health Check**: http://localhost:3000/health
The Excel files should have at least 3 columns:
## 📚 API Endpoints
- Column 1: Code
- Column 2: Display/Description
- Column 3: Version
### ICD Management
## Project setup
| Method | Endpoint | Description |
| ------ | ----------------- | -------------------------------- |
| `POST` | `/icd/import` | Import ICD data from Excel files |
| `GET` | `/icd/search` | Search ICD codes with filters |
| `GET` | `/icd/statistics` | Get database statistics |
### Health & Monitoring
| Method | Endpoint | Description |
| ------ | --------------- | ----------------------------- |
| `GET` | `/health` | Application health check |
| `GET` | `/health/ready` | Readiness probe |
| `GET` | `/health/live` | Liveness probe |
| `GET` | `/docs` | Interactive API documentation |
### Example Usage
```bash
$ npm install
# Import ICD data
curl -X POST http://localhost:3000/icd/import
# Search for diabetes codes
curl "http://localhost:3000/icd/search?search=diabetes&page=1&limit=10"
# Get statistics
curl http://localhost:3000/icd/statistics
# Health check
curl http://localhost:3000/health
```
## Compile and run the project
## 🐳 Docker Setup
### Database Only (Recommended)
```bash
# development
$ npm run start
# Start PostgreSQL with pgvector
docker-compose up -d
# watch mode
$ npm run start:dev
# Stop database
docker-compose down
# production mode
$ npm run start:prod
# Reset database (deletes all data!)
docker-compose down -v
```
## Run tests
### Connection Details
```env
DATABASE_URL=postgresql://postgres:postgres123@localhost:5432/claim_guard
Host: localhost
Port: 5432
Database: claim_guard
Username: postgres
Password: postgres123
```
### Verify pgvector Extension
```bash
# unit tests
$ npm run test
# e2e tests
$ npm run test:e2e
# test coverage
$ npm run test:cov
docker-compose exec postgres psql -U postgres -d claim_guard -c "SELECT name, default_version, installed_version FROM pg_available_extensions WHERE name = 'vector';"
```
## Deployment
## 🗂️ Database Schema
When you're ready to deploy your NestJS application to production, there are some key steps you can take to ensure it runs as efficiently as possible. Check out the [deployment documentation](https://docs.nestjs.com/deployment) for more information.
### IcdCode Model
If you are looking for a cloud-based platform to deploy your NestJS application, check out [Mau](https://mau.nestjs.com), our official platform for deploying NestJS applications on AWS. Mau makes deployment straightforward and fast, requiring just a few simple steps:
```prisma
model IcdCode {
id String @id @default(uuid())
code String @unique
display String
version String
category String // "ICD9" or "ICD10"
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
@@map("icd_codes")
}
```
### Migration Commands
```bash
$ npm install -g @nestjs/mau
$ mau deploy
# Check migration status
npx prisma migrate status
# Create new migration
npx prisma migrate dev --name description
# Deploy to production
npx prisma migrate deploy
# Reset database (development)
npx prisma migrate reset
```
With Mau, you can deploy your application in just a few clicks, allowing you to focus on building features rather than managing infrastructure.
## ⚙️ Environment Configuration
## Resources
### Development
Check out a few resources that may come in handy when working with NestJS:
```env
NODE_ENV=development
PORT=3000
DATABASE_URL=postgresql://postgres:postgres123@localhost:5432/claim_guard
CORS_ORIGINS=http://localhost:3000,http://localhost:3001
ENABLE_DOCS=true
DEBUG=true
LOG_LEVEL=debug
```
- Visit the [NestJS Documentation](https://docs.nestjs.com) to learn more about the framework.
- For questions and support, please visit our [Discord channel](https://discord.gg/G7Qnnhy).
- To dive deeper and get more hands-on experience, check out our official video [courses](https://courses.nestjs.com/).
- Deploy your application to AWS with the help of [NestJS Mau](https://mau.nestjs.com) in just a few clicks.
- Visualize your application graph and interact with the NestJS application in real-time using [NestJS Devtools](https://devtools.nestjs.com).
- Need help with your project (part-time to full-time)? Check out our official [enterprise support](https://enterprise.nestjs.com).
- To stay in the loop and get updates, follow us on [X](https://x.com/nestframework) and [LinkedIn](https://linkedin.com/company/nestjs).
- Looking for a job, or have a job to offer? Check out our official [Jobs board](https://jobs.nestjs.com).
### Production
## Support
```env
NODE_ENV=production
PORT=3000
DATABASE_URL=postgresql://user:pass@host:5432/db
CORS_ORIGINS=https://yourdomain.com
ENABLE_DOCS=false
DEBUG=false
LOG_LEVEL=warn
JWT_SECRET=strong-production-secret
```
Nest is an MIT-licensed open source project. It can grow thanks to the sponsors and support by the amazing backers. If you'd like to join them, please [read more here](https://docs.nestjs.com/support).
### Available Variables
## Stay in touch
| Variable | Description | Default |
| -------------- | ---------------------------- | ----------------------- |
| `NODE_ENV` | Environment mode | `development` |
| `PORT` | Application port | `3000` |
| `DATABASE_URL` | PostgreSQL connection string | Required |
| `CORS_ORIGINS` | Allowed CORS origins | `http://localhost:3000` |
| `ENABLE_DOCS` | Enable Swagger documentation | `true` |
| `LOG_LEVEL` | Logging level | `info` |
| `JWT_SECRET` | JWT signing secret | Required for auth |
- Author - [Kamil Myśliwiec](https://twitter.com/kammysliwiec)
- Website - [https://nestjs.com](https://nestjs.com/)
- Twitter - [@nestframework](https://twitter.com/nestframework)
## 🧪 Testing
## License
### API Testing
Nest is [MIT licensed](https://github.com/nestjs/nest/blob/master/LICENSE).
Use the included `icd.http` file with VS Code REST Client extension:
```http
### Import ICD Data
POST http://localhost:3000/icd/import
### Search ICD Codes
GET http://localhost:3000/icd/search?search=diabetes&page=1&limit=10
### Get Statistics
GET http://localhost:3000/icd/statistics
```
### Unit Tests
```bash
# Run tests
npm run test
# Watch mode
npm run test:watch
# Coverage
npm run test:cov
# E2E tests
npm run test:e2e
```
## 🔧 Development
### Project Structure
```
src/
├── main.ts # Application entry point
├── app.module.ts # Root module
├── icd/ # ICD module
│ ├── icd.controller.ts # REST endpoints
│ ├── icd.service.ts # Business logic
│ ├── icd.module.ts # Module definition
│ └── dto/ # Data transfer objects
├── health/ # Health check module
└── prisma/ # Database schema
```
### Scripts
```bash
# Development
npm run start:dev # Start with hot reload
npm run start:debug # Start with debugger
# Build
npm run build # Build for production
npm run start:prod # Start production build
# Database
npx prisma studio # Open Prisma Studio
npx prisma db push # Push schema changes (dev only)
npx prisma generate # Generate Prisma client
# Code Quality
npm run lint # Run ESLint
npm run format # Format with Prettier
```
### Adding New Features
1. **Create DTOs** with validation decorators
2. **Add Swagger decorators** for API documentation
3. **Write unit tests** for services and controllers
4. **Update database schema** if needed
5. **Create migration** with descriptive name
## 📦 Deployment
### Docker Production
```dockerfile
# Build stage
FROM node:18-alpine AS builder
WORKDIR /app
COPY package*.json ./
RUN npm ci --only=production
COPY . .
RUN npm run build
# Production stage
FROM node:18-alpine AS production
WORKDIR /app
COPY --from=builder /app/dist ./dist
COPY --from=builder /app/node_modules ./node_modules
COPY package*.json ./
EXPOSE 3000
CMD ["node", "dist/main"]
```
### Environment Setup
1. **Production Database**: Use managed PostgreSQL (AWS RDS, Google Cloud SQL)
2. **Environment Variables**: Set secure values for production
3. **SSL/TLS**: Enable HTTPS in production
4. **Monitoring**: Add application monitoring (Prometheus, Grafana)
5. **Logging**: Configure centralized logging
## 🐛 Troubleshooting
### Common Issues
**Port 5432 already in use:**
```bash
# Check what's using the port
netstat -ano | findstr :5432
# Stop local PostgreSQL service
```
**Database connection failed:**
```bash
# Check if container is running
docker-compose ps
# Check logs
docker-compose logs
# Restart database
docker-compose restart
```
**Prisma can't connect:**
```bash
# Verify DATABASE_URL in .env
# Test connection
npx prisma db pull
```
**Build errors:**
```bash
# Clean install
rm -rf node_modules package-lock.json
npm install
```
## 📄 License
This project is licensed under the MIT License.
## 🤝 Contributing
1. Fork the repository
2. Create a feature branch
3. Make your changes
4. Add tests
5. Submit a pull request
## 📞 Support
For questions and support:
- Check the [API documentation](http://localhost:3000/docs)
- Review the troubleshooting section
- Create an issue in the repository
---
**Ready to manage ICD codes efficiently!** 🚀

25
docker-compose.yml Normal file
View File

@@ -0,0 +1,25 @@
services:
# PostgreSQL Database with pgvector extension
postgres:
image: pgvector/pgvector:pg15
container_name: claim-guard-postgres
restart: unless-stopped
environment:
POSTGRES_DB: claim_guard
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres123
ports:
- '5432:5432'
volumes:
- postgres_data:/var/lib/postgresql/data
- ./docker/postgres/init:/docker-entrypoint-initdb.d
healthcheck:
test: ['CMD-SHELL', 'pg_isready -U postgres -d claim_guard']
interval: 10s
timeout: 5s
retries: 5
start_period: 30s
volumes:
postgres_data:
driver: local

View File

@@ -0,0 +1,32 @@
-- =====================================================
-- Claim Guard Database Initialization Script
-- =====================================================
-- Create database if it doesn't exist (handled by POSTGRES_DB env var)
-- But we can create additional databases if needed
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Enable other useful extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pg_trgm";
CREATE EXTENSION IF NOT EXISTS "btree_gin";
CREATE EXTENSION IF NOT EXISTS "btree_gist";
-- Create application user if needed (optional)
-- The main user is already created via POSTGRES_USER
-- Set up database permissions
GRANT ALL PRIVILEGES ON DATABASE claim_guard TO postgres;
-- Create schema for application (optional, Prisma will use public by default)
-- CREATE SCHEMA IF NOT EXISTS claim_guard;
-- Log successful initialization
DO $$
BEGIN
RAISE NOTICE 'Claim Guard database initialized successfully';
RAISE NOTICE 'pgvector extension: %', (SELECT EXISTS(SELECT 1 FROM pg_extension WHERE extname = 'vector'));
RAISE NOTICE 'Database ready for Prisma migrations';
END $$;

View File

@@ -0,0 +1,43 @@
@echo off
REM =====================================================
REM Reset PostgreSQL Database (Windows)
REM =====================================================
echo 🗑️ Resetting PostgreSQL database...
REM Stop services
docker-compose down
REM Remove volumes (this will delete all data!)
echo ⚠️ WARNING: This will delete ALL database data!
set /p confirm=Are you sure? (y/N):
if /i not "%confirm%"=="y" (
echo ❌ Operation cancelled
exit /b 1
)
docker-compose down -v
docker volume rm claim-guard-be_postgres_data 2>nul
docker volume rm claim-guard-be_pgadmin_data 2>nul
echo ✅ Database reset complete!
echo 🐳 Starting fresh database...
REM Start database again
docker-compose up -d postgres
echo ⏳ Waiting for PostgreSQL to be ready...
REM Wait for PostgreSQL to be healthy
:wait_loop
docker-compose exec postgres pg_isready -U postgres -d claim_guard >nul 2>&1
if %errorlevel% neq 0 (
echo ⏳ PostgreSQL is unavailable - sleeping
timeout /t 2 /nobreak >nul
goto wait_loop
)
echo ✅ Fresh database is ready!
echo 📊 Run 'npx prisma migrate deploy' to setup schema
pause

View File

@@ -0,0 +1,29 @@
@echo off
REM =====================================================
REM Start PostgreSQL Database Only (Windows)
REM =====================================================
echo 🐳 Starting PostgreSQL with pgvector...
REM Start only the database service
docker-compose up -d postgres
echo ⏳ Waiting for PostgreSQL to be ready...
REM Wait for PostgreSQL to be healthy
:wait_loop
docker-compose exec postgres pg_isready -U postgres -d claim_guard >nul 2>&1
if %errorlevel% neq 0 (
echo ⏳ PostgreSQL is unavailable - sleeping
timeout /t 2 /nobreak >nul
goto wait_loop
)
echo ✅ PostgreSQL is ready!
echo 📊 Database URL: postgresql://postgres:postgres123@localhost:5432/claim_guard
REM Show logs
echo 📋 Database logs:
docker-compose logs postgres
pause

View File

@@ -0,0 +1,25 @@
#!/bin/bash
# =====================================================
# Start PostgreSQL Database Only
# =====================================================
echo "🐳 Starting PostgreSQL with pgvector..."
# Start only the database service
docker-compose up -d postgres
echo "⏳ Waiting for PostgreSQL to be ready..."
# Wait for PostgreSQL to be healthy
until docker-compose exec postgres pg_isready -U postgres -d claim_guard; do
echo "⏳ PostgreSQL is unavailable - sleeping"
sleep 2
done
echo "✅ PostgreSQL is ready!"
echo "📊 Database URL: postgresql://postgres:postgres123@localhost:5432/claim_guard"
# Show logs
echo "📋 Database logs:"
docker-compose logs postgres

2469
package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@@ -20,12 +20,20 @@
"test:e2e": "jest --config ./test/jest-e2e.json"
},
"dependencies": {
"@langchain/community": "^0.3.53",
"@langchain/openai": "^0.6.9",
"@nestjs/common": "^11.0.1",
"@nestjs/core": "^11.0.1",
"@nestjs/platform-express": "^11.0.1",
"@nestjs/swagger": "^11.2.0",
"@prisma/client": "^6.14.0",
"class-transformer": "^0.5.1",
"class-validator": "^0.14.2",
"langchain": "^0.3.31",
"pg": "^8.11.3",
"reflect-metadata": "^0.2.2",
"rxjs": "^7.8.1",
"swagger-ui-express": "^5.0.1",
"xlsx": "^0.18.5"
},
"devDependencies": {

View File

@@ -0,0 +1,26 @@
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- CreateTable
CREATE TABLE "icd_codes" (
"id" TEXT NOT NULL,
"code" TEXT NOT NULL,
"display" TEXT NOT NULL,
"version" TEXT NOT NULL,
"category" TEXT NOT NULL,
"embedding" vector(1536),
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "icd_codes_pkey" PRIMARY KEY ("id")
);
-- Create unique index on code
CREATE UNIQUE INDEX "icd_codes_code_key" ON "icd_codes"("code");
-- Create ivfflat index for fast vector similarity search
CREATE INDEX "icd_codes_embedding_idx" ON "icd_codes" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Add comments for documentation
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';

View File

@@ -0,0 +1,27 @@
-- Migration: Add pgvector support to icd_codes table
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column with pgvector type
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "embedding" vector(1536);
-- Add metadata column for LangChain pgvector
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "metadata" JSONB;
-- Add content column for LangChain pgvector
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "content" TEXT;
-- Create ivfflat index for fast vector similarity search
CREATE INDEX IF NOT EXISTS "icd_codes_embedding_idx" ON "icd_codes"
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Create index on metadata for fast JSON queries
CREATE INDEX IF NOT EXISTS "icd_codes_metadata_idx" ON "icd_codes" USING GIN (metadata);
-- Add comments for documentation
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
COMMENT ON COLUMN "icd_codes"."metadata" IS 'JSON metadata for LangChain pgvector operations';
COMMENT ON COLUMN "icd_codes"."content" IS 'Text content for LangChain pgvector operations';
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';
COMMENT ON INDEX "icd_codes_metadata_idx" IS 'GIN index for fast JSON metadata queries';

View File

@@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "postgresql"

View File

@@ -1,12 +1,5 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
// Looking for ways to speed up your queries, or scale easily with your serverless or edge functions?
// Try Prisma Accelerate: https://pris.ly/cli/accelerate-init
generator client {
provider = "prisma-client-js"
output = "../generated/prisma"
}
datasource db {
@@ -19,9 +12,14 @@ model IcdCode {
code String @unique
display String
version String
category String // "ICD9" or "ICD10"
category String
embedding Unsupported("vector")?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
metadata Json?
content String?
@@index([embedding])
@@index([metadata], type: Gin)
@@map("icd_codes")
}

View File

@@ -2,9 +2,10 @@ import { Module } from '@nestjs/common';
import { AppController } from './app.controller';
import { AppService } from './app.service';
import { IcdModule } from './icd/icd.module';
import { HealthModule } from './health/health.module';
@Module({
imports: [IcdModule],
imports: [IcdModule, HealthModule],
controllers: [AppController],
providers: [AppService],
})

View File

@@ -0,0 +1,83 @@
import { Controller, Get } from '@nestjs/common';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiProperty,
} from '@nestjs/swagger';
export class HealthCheckResponseDto {
@ApiProperty({ example: 'ok' })
status: string;
@ApiProperty({ example: '2024-01-01T00:00:00.000Z' })
timestamp: string;
@ApiProperty({ example: 3600 })
uptime: number;
@ApiProperty({ example: 'development' })
environment: string;
@ApiProperty({ example: '1.0.0' })
version: string;
@ApiProperty({ example: { status: 'connected' } })
database: {
status: string;
};
}
@ApiTags('Health')
@Controller('health')
export class HealthController {
@Get()
@ApiOperation({
summary: 'Health check endpoint',
description:
'Check the health status of the application and its dependencies',
})
@ApiResponse({
status: 200,
description: 'Application is healthy',
type: HealthCheckResponseDto,
})
async getHealth(): Promise<HealthCheckResponseDto> {
return {
status: 'ok',
timestamp: new Date().toISOString(),
uptime: process.uptime(),
environment: process.env.NODE_ENV || 'development',
version: '1.0.0',
database: {
status: 'connected', // In real implementation, check actual DB connection
},
};
}
@Get('ready')
@ApiOperation({
summary: 'Readiness check',
description: 'Check if the application is ready to serve requests',
})
@ApiResponse({
status: 200,
description: 'Application is ready',
})
async getReady() {
return { status: 'ready' };
}
@Get('live')
@ApiOperation({
summary: 'Liveness check',
description: 'Check if the application is alive',
})
@ApiResponse({
status: 200,
description: 'Application is alive',
})
async getLive() {
return { status: 'alive' };
}
}

View File

@@ -0,0 +1,7 @@
import { Module } from '@nestjs/common';
import { HealthController } from './health.controller';
@Module({
controllers: [HealthController],
})
export class HealthModule {}

View File

@@ -0,0 +1,192 @@
import { ApiProperty, ApiPropertyOptional } from '@nestjs/swagger';
export class IcdCodeDto {
@ApiProperty({
description: 'Unique identifier for the ICD code',
example: '550e8400-e29b-41d4-a716-446655440000',
})
id: string;
@ApiProperty({
description: 'ICD code',
example: 'E11.9',
})
code: string;
@ApiProperty({
description: 'Description of the ICD code',
example: 'Type 2 diabetes mellitus without complications',
})
display: string;
@ApiProperty({
description: 'Version of the ICD standard',
example: '2024',
})
version: string;
@ApiProperty({
description: 'ICD category',
example: 'ICD10',
enum: ['ICD9', 'ICD10'],
})
category: string;
@ApiProperty({
description: 'Creation timestamp',
example: '2024-01-01T00:00:00.000Z',
})
createdAt: Date;
@ApiProperty({
description: 'Last update timestamp',
example: '2024-01-01T00:00:00.000Z',
})
updatedAt: Date;
}
export class PaginationMetaDto {
@ApiProperty({
description: 'Current page number',
example: 1,
})
currentPage: number;
@ApiProperty({
description: 'Total number of pages',
example: 10,
})
totalPages: number;
@ApiProperty({
description: 'Total number of items',
example: 100,
})
totalItems: number;
@ApiProperty({
description: 'Number of items per page',
example: 10,
})
itemsPerPage: number;
@ApiProperty({
description: 'Whether there is a next page',
example: true,
})
hasNextPage: boolean;
@ApiProperty({
description: 'Whether there is a previous page',
example: false,
})
hasPreviousPage: boolean;
}
export class IcdSearchResponseDto {
@ApiProperty({
description: 'Request success status',
example: true,
})
success: boolean;
@ApiProperty({
description: 'Array of ICD codes',
type: [IcdCodeDto],
})
data: IcdCodeDto[];
@ApiProperty({
description: 'Pagination metadata',
type: PaginationMetaDto,
})
pagination: PaginationMetaDto;
@ApiPropertyOptional({
description: 'Response message',
example: 'ICD codes retrieved successfully',
})
message?: string;
}
export class IcdImportResponseDto {
@ApiProperty({
description: 'Request success status',
example: true,
})
success: boolean;
@ApiProperty({
description: 'Success message',
example: 'ICD data imported successfully',
})
message: string;
@ApiProperty({
description: 'Import statistics',
example: {
icd9Count: 150,
icd10Count: 250,
total: 400,
},
})
data: {
icd9Count: number;
icd10Count: number;
total: number;
};
}
export class IcdStatisticsDto {
@ApiProperty({
description: 'Total number of ICD9 codes',
example: 150,
})
icd9Count: number;
@ApiProperty({
description: 'Total number of ICD10 codes',
example: 250,
})
icd10Count: number;
@ApiProperty({
description: 'Total number of all ICD codes',
example: 400,
})
total: number;
}
export class IcdStatisticsResponseDto {
@ApiProperty({
description: 'Request success status',
example: true,
})
success: boolean;
@ApiProperty({
description: 'ICD statistics data',
type: IcdStatisticsDto,
})
data: IcdStatisticsDto;
}
export class ErrorResponseDto {
@ApiProperty({
description: 'Request success status',
example: false,
})
success: boolean;
@ApiProperty({
description: 'Error message',
example: 'Failed to process request',
})
message: string;
@ApiPropertyOptional({
description: 'Detailed error information',
example: 'Database connection failed',
})
error?: string;
}

View File

@@ -1,6 +1,62 @@
import { ApiPropertyOptional } from '@nestjs/swagger';
import {
IsOptional,
IsString,
IsNumber,
IsEnum,
Min,
Max,
} from 'class-validator';
import { Type } from 'class-transformer';
export enum IcdCategory {
ICD9 = 'ICD9',
ICD10 = 'ICD10',
}
export class SearchIcdDto {
category?: 'ICD9' | 'ICD10';
@ApiPropertyOptional({
description: 'ICD category to filter by',
enum: IcdCategory,
example: 'ICD10',
})
@IsOptional()
@IsEnum(IcdCategory)
category?: IcdCategory;
@ApiPropertyOptional({
description: 'Search term for ICD code or description',
example: 'diabetes',
minLength: 1,
maxLength: 100,
})
@IsOptional()
@IsString()
search?: string;
@ApiPropertyOptional({
description: 'Page number for pagination',
example: 1,
minimum: 1,
default: 1,
})
@IsOptional()
@Type(() => Number)
@IsNumber()
@Min(1)
page?: number;
@ApiPropertyOptional({
description: 'Number of items per page',
example: 10,
minimum: 1,
maximum: 100,
default: 10,
})
@IsOptional()
@Type(() => Number)
@IsNumber()
@Min(1)
@Max(100)
limit?: number;
}

View File

@@ -1,7 +1,22 @@
import { Controller, Get, Post, Query, Logger } from '@nestjs/common';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiQuery,
ApiBadRequestResponse,
ApiInternalServerErrorResponse,
} from '@nestjs/swagger';
import { IcdService } from './icd.service';
import { SearchIcdDto } from './dto/search-icd.dto';
import {
IcdSearchResponseDto,
IcdImportResponseDto,
IcdStatisticsResponseDto,
ErrorResponseDto,
} from './dto/icd-response.dto';
@ApiTags('ICD')
@Controller('icd')
export class IcdController {
private readonly logger = new Logger(IcdController.name);
@@ -9,7 +24,25 @@ export class IcdController {
constructor(private readonly icdService: IcdService) {}
@Post('import')
async importData() {
@ApiOperation({
summary: 'Import ICD data from Excel files',
description:
'Import ICD-9 and ICD-10 codes from Excel files located in the test directory. This operation will process both ICD files and insert/update the database with the latest codes.',
})
@ApiResponse({
status: 200,
description: 'ICD data imported successfully',
type: IcdImportResponseDto,
})
@ApiBadRequestResponse({
description: 'Bad request - Invalid file format or missing files',
type: ErrorResponseDto,
})
@ApiInternalServerErrorResponse({
description: 'Internal server error during import process',
type: ErrorResponseDto,
})
async importData(): Promise<IcdImportResponseDto> {
try {
this.logger.log('Starting ICD data import...');
const result = await this.icdService.importIcdData();
@@ -20,21 +53,62 @@ export class IcdController {
};
} catch (error) {
this.logger.error('Error importing ICD data:', error);
return {
success: false,
message: 'Failed to import ICD data',
error: error.message,
};
throw error;
}
}
@Get('search')
@ApiOperation({
summary: 'Search ICD codes with filters and pagination',
description:
'Search for ICD codes using various filters like category, search term, with pagination support. Returns a paginated list of matching ICD codes.',
})
@ApiQuery({
name: 'category',
required: false,
description: 'Filter by ICD category',
enum: ['ICD9', 'ICD10'],
example: 'ICD10',
})
@ApiQuery({
name: 'search',
required: false,
description: 'Search term for ICD code or description',
example: 'diabetes',
})
@ApiQuery({
name: 'page',
required: false,
description: 'Page number for pagination',
example: 1,
type: 'number',
})
@ApiQuery({
name: 'limit',
required: false,
description: 'Number of items per page (max 100)',
example: 10,
type: 'number',
})
@ApiResponse({
status: 200,
description: 'ICD codes retrieved successfully',
type: IcdSearchResponseDto,
})
@ApiBadRequestResponse({
description: 'Bad request - Invalid query parameters',
type: ErrorResponseDto,
})
@ApiInternalServerErrorResponse({
description: 'Internal server error during search',
type: ErrorResponseDto,
})
async searchIcdCodes(
@Query('category') category?: string,
@Query('search') search?: string,
@Query('page') page?: string,
@Query('limit') limit?: string,
) {
): Promise<IcdSearchResponseDto> {
try {
const pageNum = page ? parseInt(page, 10) : 1;
const limitNum = limit ? parseInt(limit, 10) : 10;
@@ -48,20 +122,38 @@ export class IcdController {
return {
success: true,
...result,
data: result.data,
pagination: {
currentPage: result.page,
totalPages: result.totalPages,
totalItems: result.total,
itemsPerPage: result.limit,
hasNextPage: result.page < result.totalPages,
hasPreviousPage: result.page > 1,
},
};
} catch (error) {
this.logger.error('Error searching ICD codes:', error);
return {
success: false,
message: 'Failed to search ICD codes',
error: error.message,
};
throw error;
}
}
@Get('statistics')
async getStatistics() {
@ApiOperation({
summary: 'Get ICD database statistics',
description:
'Retrieve statistics about the ICD database including total counts for ICD-9 and ICD-10 codes, and last import information.',
})
@ApiResponse({
status: 200,
description: 'Statistics retrieved successfully',
type: IcdStatisticsResponseDto,
})
@ApiInternalServerErrorResponse({
description: 'Internal server error while fetching statistics',
type: ErrorResponseDto,
})
async getStatistics(): Promise<IcdStatisticsResponseDto> {
try {
const stats = await this.icdService.getStatistics();
return {
@@ -70,11 +162,7 @@ export class IcdController {
};
} catch (error) {
this.logger.error('Error getting statistics:', error);
return {
success: false,
message: 'Failed to get statistics',
error: error.message,
};
throw error;
}
}
}

View File

@@ -1,10 +1,12 @@
import { Module } from '@nestjs/common';
import { IcdController } from './icd.controller';
import { IcdService } from './icd.service';
import { PgVectorModule } from './pgvector.module';
@Module({
controllers: [IcdController],
providers: [IcdService],
exports: [IcdService],
imports: [PgVectorModule],
exports: [IcdService, PgVectorModule],
})
export class IcdModule {}

View File

@@ -181,6 +181,16 @@ export class IcdService {
skip,
take: limit,
orderBy: { code: 'asc' },
select: {
id: true,
code: true,
display: true,
version: true,
category: true,
createdAt: true,
updatedAt: true,
// Exclude embedding field to avoid deserialization error
},
}),
this.prisma.icdCode.count({ where }),
]);

View File

@@ -0,0 +1,670 @@
import {
Controller,
Get,
Post,
Query,
Body,
HttpStatus,
ValidationPipe,
UsePipes,
} from '@nestjs/common';
import {
ApiTags,
ApiOperation,
ApiResponse,
ApiQuery,
ApiBody,
ApiProperty,
ApiConsumes,
ApiProduces,
} from '@nestjs/swagger';
import { PgVectorService, VectorSearchResult } from './pgvector.service';
export class VectorSearchDto {
@ApiProperty({
description: 'Search query text for vector similarity search',
example: 'diabetes mellitus type 2',
minLength: 1,
maxLength: 500,
})
query: string;
@ApiProperty({
description: 'Maximum number of results to return',
example: 10,
required: false,
minimum: 1,
maximum: 100,
default: 10,
})
limit?: number;
@ApiProperty({
description: 'ICD category filter to narrow down search results',
example: 'ICD10',
required: false,
enum: ['ICD9', 'ICD10'],
default: undefined,
})
category?: string;
@ApiProperty({
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
example: 0.7,
required: false,
minimum: 0.0,
maximum: 1.0,
default: 0.7,
})
threshold?: number;
}
export class EmbeddingRequestDto {
@ApiProperty({
description: 'Text to generate vector embedding for',
example: 'diabetes mellitus',
minLength: 1,
maxLength: 1000,
})
text: string;
@ApiProperty({
description: 'Embedding model to use for generation',
example: 'text-embedding-ada-002',
required: false,
default: 'text-embedding-ada-002',
})
model?: string;
}
export class VectorSearchResponseDto {
@ApiProperty({
description: 'Array of search results with similarity scores',
type: 'array',
items: {
type: 'object',
properties: {
id: {
type: 'string',
description: 'Unique identifier for the ICD code',
example: 'uuid-123',
},
code: {
type: 'string',
description: 'ICD code (e.g., E11.9)',
example: 'E11.9',
},
display: {
type: 'string',
description: 'Human readable description of the ICD code',
example: 'Type 2 diabetes mellitus without complications',
},
version: {
type: 'string',
description: 'ICD version (e.g., ICD-10-CM)',
example: 'ICD-10-CM',
},
category: {
type: 'string',
description: 'ICD category (ICD9 or ICD10)',
example: 'ICD10',
},
similarity: {
type: 'number',
description: 'Similarity score between 0 and 1',
example: 0.89,
},
},
},
})
data: VectorSearchResult[];
@ApiProperty({
description: 'Total number of results found',
example: 5,
minimum: 0,
})
total: number;
@ApiProperty({
description: 'Search query that was used',
example: 'diabetes mellitus type 2',
})
query: string;
}
export class EmbeddingStatsResponseDto {
@ApiProperty({
description: 'Total number of ICD codes in the system',
example: 1000,
minimum: 0,
})
total: number;
@ApiProperty({
description: 'Number of ICD codes with generated embeddings',
example: 500,
minimum: 0,
})
withEmbeddings: number;
@ApiProperty({
description: 'Number of ICD codes without embeddings',
example: 500,
minimum: 0,
})
withoutEmbeddings: number;
@ApiProperty({
description: 'Percentage of codes with embeddings',
example: 50.0,
minimum: 0,
maximum: 100,
})
percentage: number;
@ApiProperty({
description: 'Current status of the vector store',
example: 'Initialized',
enum: ['Initialized', 'Not Initialized', 'Error'],
})
vectorStoreStatus: string;
}
export class VectorStoreStatusDto {
@ApiProperty({
description: 'Whether the vector store is currently initialized',
example: true,
})
initialized: boolean;
@ApiProperty({
description: 'Number of documents currently in the vector store',
example: 1000,
minimum: 0,
})
documentCount: number;
@ApiProperty({
description: 'Embedding model currently being used',
example: 'OpenAI text-embedding-ada-002',
enum: ['OpenAI text-embedding-ada-002', 'Not Available'],
})
embeddingModel: string;
@ApiProperty({
description: 'Timestamp of last vector store update',
example: '2024-01-01T00:00:00.000Z',
})
lastUpdated: Date;
}
export class InitializeResponseDto {
@ApiProperty({
description: 'Success message',
example: 'Pgvector store initialized successfully',
})
message: string;
@ApiProperty({
description: 'Number of documents loaded into vector store',
example: 1000,
minimum: 0,
})
documentCount: number;
}
export class RefreshResponseDto {
@ApiProperty({
description: 'Success message',
example: 'Pgvector store refreshed successfully',
})
message: string;
@ApiProperty({
description: 'Number of documents in refreshed vector store',
example: 1000,
minimum: 0,
})
documentCount: number;
}
export class GenerateEmbeddingResponseDto {
@ApiProperty({
description: 'Generated vector embedding array',
type: 'array',
items: { type: 'number' },
example: [0.1, 0.2, 0.3, -0.1, 0.5],
})
embedding: number[];
@ApiProperty({
description: 'Number of dimensions in the embedding vector',
example: 1536,
minimum: 1,
})
dimensions: number;
@ApiProperty({
description: 'Model used to generate the embedding',
example: 'text-embedding-ada-002',
})
model: string;
}
export class GenerateAllEmbeddingsResponseDto {
@ApiProperty({
description: 'Number of embeddings successfully processed',
example: 500,
minimum: 0,
})
processed: number;
@ApiProperty({
description: 'Number of errors encountered during processing',
example: 0,
minimum: 0,
})
errors: number;
@ApiProperty({
description: 'Summary message of the operation',
example: 'Processed 500 embeddings with 0 errors',
})
message: string;
}
@ApiTags('PgVector Operations')
@Controller('pgvector')
@UsePipes(new ValidationPipe({ transform: true }))
export class PgVectorController {
constructor(private readonly pgVectorService: PgVectorService) {}
@Post('search')
@ApiOperation({
summary: 'PgVector similarity search',
description:
'Search ICD codes using pgvector similarity with the given query. Returns results ordered by similarity score.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiBody({
type: VectorSearchDto,
description: 'Search parameters for pgvector similarity search',
examples: {
diabetes: {
summary: 'Search for diabetes',
value: {
query: 'diabetes mellitus type 2',
limit: 10,
category: 'ICD10',
threshold: 0.7,
},
},
heart: {
summary: 'Search for heart conditions',
value: {
query: 'heart attack myocardial infarction',
limit: 5,
category: 'ICD10',
threshold: 0.8,
},
},
},
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Search results with similarity scores',
type: VectorSearchResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid search parameters or query',
schema: {
type: 'object',
properties: {
statusCode: { type: 'number', example: 400 },
message: { type: 'string', example: 'Query is required' },
error: { type: 'string', example: 'Bad Request' },
},
},
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Internal server error during pgvector search',
schema: {
type: 'object',
properties: {
statusCode: { type: 'number', example: 500 },
message: { type: 'string', example: 'Internal server error' },
error: { type: 'string', example: 'Internal Server Error' },
},
},
})
async vectorSearch(
@Body() searchDto: VectorSearchDto,
): Promise<VectorSearchResponseDto> {
const results = await this.pgVectorService.vectorSearch(
searchDto.query,
searchDto.limit || 10,
searchDto.category,
searchDto.threshold || 0.7,
);
return {
data: results,
total: results.length,
query: searchDto.query,
};
}
@Get('search')
@ApiOperation({
summary: 'PgVector search via GET',
description:
'Search ICD codes using pgvector similarity via query parameters. Alternative to POST method.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiQuery({
name: 'query',
description: 'Search query text for pgvector similarity search',
example: 'diabetes mellitus type 2',
required: true,
type: 'string',
})
@ApiQuery({
name: 'limit',
description: 'Maximum number of results to return',
example: 10,
required: false,
type: 'number',
minimum: 1,
maximum: 100,
})
@ApiQuery({
name: 'category',
description: 'ICD category filter to narrow down search results',
example: 'ICD10',
required: false,
type: 'string',
enum: ['ICD9', 'ICD10'],
})
@ApiQuery({
name: 'threshold',
description: 'Similarity threshold (0.0 - 1.0) for filtering results',
example: 0.7,
required: false,
type: 'number',
minimum: 0.0,
maximum: 1.0,
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Search results with similarity scores',
type: VectorSearchResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid query parameters',
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Internal server error during pgvector search',
})
async vectorSearchGet(
@Query('query') query: string,
@Query('limit') limit?: string,
@Query('category') category?: string,
@Query('threshold') threshold?: string,
): Promise<VectorSearchResponseDto> {
const results = await this.pgVectorService.vectorSearch(
query,
limit ? parseInt(limit) : 10,
category,
threshold ? parseFloat(threshold) : 0.7,
);
return {
data: results,
total: results.length,
query,
};
}
@Post('hybrid-search')
@ApiOperation({
summary: 'Hybrid search (PgVector + Text)',
description:
'Combine pgvector similarity with text search for better and more accurate results. Combines semantic understanding with traditional text matching.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiBody({
type: VectorSearchDto,
description: 'Search parameters for hybrid search',
examples: {
diabetes: {
summary: 'Hybrid search for diabetes',
value: {
query: 'diabetes mellitus type 2',
limit: 15,
category: 'ICD10',
},
},
},
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Hybrid search results combining pgvector and text search',
type: VectorSearchResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid search parameters',
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Internal server error during hybrid search',
})
async hybridSearch(
@Body() searchDto: VectorSearchDto,
): Promise<VectorSearchResponseDto> {
const results = await this.pgVectorService.hybridSearch(
searchDto.query,
searchDto.limit || 10,
searchDto.category,
);
return {
data: results,
total: results.length,
query: searchDto.query,
};
}
@Post('generate-embedding')
@ApiOperation({
summary: 'Generate text embedding',
description:
'Generate vector embedding for the given text using OpenAI. Returns 1536-dimensional vector.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiBody({
type: EmbeddingRequestDto,
description: 'Text to generate embedding for',
examples: {
diabetes: {
summary: 'Generate embedding for diabetes text',
value: {
text: 'diabetes mellitus',
model: 'text-embedding-ada-002',
},
},
heart: {
summary: 'Generate embedding for heart condition',
value: {
text: 'acute myocardial infarction',
model: 'text-embedding-ada-002',
},
},
},
})
@ApiResponse({
status: HttpStatus.OK,
description: 'Generated embedding vector with metadata',
type: GenerateEmbeddingResponseDto,
})
@ApiResponse({
status: HttpStatus.BAD_REQUEST,
description: 'Invalid text input',
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error generating embedding',
})
async generateEmbedding(
@Body() requestDto: EmbeddingRequestDto,
): Promise<GenerateEmbeddingResponseDto> {
const embedding = await this.pgVectorService.generateEmbedding(
requestDto.text,
requestDto.model,
);
return {
embedding,
dimensions: embedding.length,
model: requestDto.model || 'text-embedding-ada-002',
};
}
@Post('generate-and-store-all-embeddings')
@ApiOperation({
summary: 'Generate and store embeddings for all ICD codes',
description:
'Batch generate embeddings for all ICD codes and store them in the database with pgvector. This process may take some time depending on the number of codes.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Embedding generation and storage results summary',
type: GenerateAllEmbeddingsResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error during batch embedding generation and storage',
})
async generateAndStoreAllEmbeddings(): Promise<GenerateAllEmbeddingsResponseDto> {
const result = await this.pgVectorService.generateAndStoreAllEmbeddings();
return {
...result,
message: `Processed ${result.processed} embeddings with ${result.errors} errors`,
};
}
@Get('stats')
@ApiOperation({
summary: 'Get embedding statistics',
description:
'Get comprehensive statistics about ICD codes and their embedding status in the pgvector store.',
tags: ['PgVector Operations'],
})
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Embedding statistics and pgvector store status',
type: EmbeddingStatsResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error retrieving statistics',
})
async getEmbeddingStats(): Promise<EmbeddingStatsResponseDto> {
return await this.pgVectorService.getEmbeddingStats();
}
@Get('status')
@ApiOperation({
summary: 'Get pgvector store status',
description:
'Get current operational status of the pgvector store including initialization state and document count.',
tags: ['PgVector Operations'],
})
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Current pgvector store status and configuration',
type: VectorStoreStatusDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error retrieving pgvector store status',
})
async getVectorStoreStatus(): Promise<VectorStoreStatusDto> {
return await this.pgVectorService.getVectorStoreStatus();
}
@Post('initialize')
@ApiOperation({
summary: 'Initialize pgvector store',
description:
'Initialize or reinitialize the pgvector store with all available ICD codes. This loads codes from the database into the pgvector store.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Pgvector store initialization results',
type: InitializeResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error during pgvector store initialization',
})
async initializeVectorStore(): Promise<InitializeResponseDto> {
await this.pgVectorService.initializeVectorStore();
const status = await this.pgVectorService.getVectorStoreStatus();
return {
message: 'Pgvector store initialized successfully',
documentCount: status.documentCount,
};
}
@Post('refresh')
@ApiOperation({
summary: 'Refresh pgvector store',
description:
'Refresh the pgvector store with the latest ICD codes data from the database. Useful after data updates.',
tags: ['PgVector Operations'],
})
@ApiConsumes('application/json')
@ApiProduces('application/json')
@ApiResponse({
status: HttpStatus.OK,
description: 'Pgvector store refresh results',
type: RefreshResponseDto,
})
@ApiResponse({
status: HttpStatus.INTERNAL_SERVER_ERROR,
description: 'Error during pgvector store refresh',
})
async refreshVectorStore(): Promise<RefreshResponseDto> {
await this.pgVectorService.refreshVectorStore();
const status = await this.pgVectorService.getVectorStoreStatus();
return {
message: 'Pgvector store refreshed successfully',
documentCount: status.documentCount,
};
}
}

View File

@@ -0,0 +1,10 @@
import { Module } from '@nestjs/common';
import { PgVectorController } from './pgvector.controller';
import { PgVectorService } from './pgvector.service';
@Module({
controllers: [PgVectorController],
providers: [PgVectorService],
exports: [PgVectorService],
})
export class PgVectorModule {}

611
src/icd/pgvector.service.ts Normal file
View File

@@ -0,0 +1,611 @@
import { Injectable, Logger } from '@nestjs/common';
import { PrismaClient } from '../../generated/prisma';
import { OpenAIEmbeddings } from '@langchain/openai';
import { PGVectorStore } from '@langchain/community/vectorstores/pgvector';
import { Document } from 'langchain/document';
import { Pool } from 'pg';
export interface VectorSearchResult {
id: string;
code: string;
display: string;
version: string;
category: string;
similarity: number;
}
export interface EmbeddingRequest {
text: string;
model?: string;
}
@Injectable()
export class PgVectorService {
private readonly logger = new Logger(PgVectorService.name);
private readonly prisma = new PrismaClient();
private readonly pool: Pool;
private vectorStore: PGVectorStore | null = null;
private embeddings: OpenAIEmbeddings | null = null;
constructor() {
// Initialize PostgreSQL connection pool
this.pool = new Pool({
connectionString: process.env.DATABASE_URL,
max: 20,
idleTimeoutMillis: 30000,
connectionTimeoutMillis: 2000,
});
this.initializeEmbeddings();
}
/**
* Initialize OpenAI embeddings
*/
private async initializeEmbeddings() {
try {
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
this.logger.error(
'OPENAI_API_KEY not found. Vector operations require OpenAI API key.',
);
throw new Error('OPENAI_API_KEY is required for vector operations');
}
const apiModel = process.env.OPENAI_API_MODEL;
const modelName = apiModel || 'text-embedding-ada-002';
this.embeddings = new OpenAIEmbeddings({
openAIApiKey: apiKey,
modelName: modelName,
maxConcurrency: 5,
});
this.logger.log(
`OpenAI embeddings initialized successfully with model: ${modelName}`,
);
} catch (error) {
this.logger.error('Failed to initialize OpenAI embeddings:', error);
throw new Error(
`Failed to initialize OpenAI embeddings: ${error.message}`,
);
}
}
/**
* Initialize pgvector store dengan LangChain
*/
async initializeVectorStore(): Promise<void> {
try {
this.logger.log('Initializing pgvector store...');
if (!this.embeddings) {
throw new Error(
'OpenAI embeddings not initialized. Cannot create vector store.',
);
}
// Get database connection string
const connectionString = process.env.DATABASE_URL;
if (!connectionString) {
throw new Error('DATABASE_URL not found');
}
// Initialize pgvector store without inserting data
this.vectorStore = await PGVectorStore.initialize(this.embeddings, {
postgresConnectionOptions: {
connectionString,
},
tableName: 'icd_codes',
columns: {
idColumnName: 'id',
vectorColumnName: 'embedding',
contentColumnName: 'content',
metadataColumnName: 'metadata',
},
});
this.logger.log('Pgvector store initialized successfully');
} catch (error) {
this.logger.error('Error initializing pgvector store:', error);
throw error;
}
}
/**
* Generate embedding untuk text menggunakan OpenAI
*/
async generateEmbedding(
text: string,
model: string = 'text-embedding-ada-002',
): Promise<number[]> {
try {
this.logger.log(
`Generating embedding for text: ${text.substring(0, 100)}...`,
);
if (!this.embeddings) {
throw new Error(
'OpenAI embeddings not initialized. Please check your API configuration.',
);
}
// Use OpenAI embeddings
const embedding = await this.embeddings.embedQuery(text);
this.logger.log(
`Generated OpenAI embedding with ${embedding.length} dimensions`,
);
return embedding;
} catch (error) {
this.logger.error('Error generating embedding:', error);
throw new Error(`Failed to generate embedding: ${error.message}`);
}
}
/**
* Generate dan simpan embeddings untuk sample ICD codes (default: 100)
*/
async generateAndStoreAllEmbeddings(limit: number = 100): Promise<{
processed: number;
errors: number;
totalSample: number;
}> {
try {
this.logger.log(
`Starting batch embedding generation and storage for sample ${limit} ICD codes...`,
);
// Get sample ICD codes without embeddings using raw SQL
const codesWithoutEmbedding = await this.pool.query(
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL LIMIT $1',
[limit],
);
if (codesWithoutEmbedding.rows.length === 0) {
this.logger.log('All ICD codes already have embeddings');
return { processed: 0, errors: 0, totalSample: 0 };
}
this.logger.log(
`Found ${codesWithoutEmbedding.rows.length} sample codes without embeddings (limited to ${limit})`,
);
let processed = 0;
let errors = 0;
// Process each code
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
const code = codesWithoutEmbedding.rows[i];
try {
// Create text representation for embedding
const text = `${code.code} - ${code.display}`;
// Generate embedding
const embedding = await this.generateEmbedding(text);
// Convert embedding array to proper vector format for pgvector
const vectorString = `[${embedding.join(',')}]`;
// Update database with embedding, metadata, and content using raw SQL
await this.pool.query(
`UPDATE icd_codes
SET embedding = $1::vector,
metadata = $2::jsonb,
content = $3
WHERE id = $4`,
[
vectorString,
JSON.stringify({
id: code.id,
code: code.code,
display: code.display,
version: code.version,
category: code.category,
}),
text,
code.id,
],
);
processed++;
if (processed % 10 === 0) {
this.logger.log(
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings`,
);
}
} catch (error) {
this.logger.error(`Error processing code ${code.code}:`, error);
errors++;
}
}
this.logger.log(
`Sample embedding generation and storage completed. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
);
return {
processed,
errors,
totalSample: codesWithoutEmbedding.rows.length,
};
} catch (error) {
this.logger.error('Error in generateAndStoreAllEmbeddings:', error);
throw error;
}
}
/**
* Generate dan simpan embeddings untuk sample ICD codes dengan kategori tertentu
*/
async generateAndStoreSampleEmbeddingsByCategory(
category: string,
limit: number = 100,
): Promise<{
processed: number;
errors: number;
totalSample: number;
category: string;
}> {
try {
this.logger.log(
`Starting batch embedding generation for sample ${limit} ICD codes in category: ${category}`,
);
// Get sample ICD codes by category without embeddings using raw SQL
const codesWithoutEmbedding = await this.pool.query(
'SELECT id, code, display, version, category FROM icd_codes WHERE embedding IS NULL AND category = $1 LIMIT $2',
[category, limit],
);
if (codesWithoutEmbedding.rows.length === 0) {
this.logger.log(
`No ICD codes found in category '${category}' without embeddings`,
);
return { processed: 0, errors: 0, totalSample: 0, category };
}
this.logger.log(
`Found ${codesWithoutEmbedding.rows.length} sample codes in category '${category}' without embeddings (limited to ${limit})`,
);
let processed = 0;
let errors = 0;
// Process each code
for (let i = 0; i < codesWithoutEmbedding.rows.length; i++) {
const code = codesWithoutEmbedding.rows[i];
try {
// Create text representation for embedding
const text = `${code.code} - ${code.display}`;
// Generate embedding
const embedding = await this.generateEmbedding(text);
// Convert embedding array to proper vector format for pgvector
const vectorString = `[${embedding.join(',')}]`;
// Update database with embedding, metadata, and content using raw SQL
await this.pool.query(
`UPDATE icd_codes
SET embedding = $1::vector,
metadata = $2::jsonb,
content = $3
WHERE id = $4`,
[
vectorString,
JSON.stringify({
id: code.id,
code: code.code,
display: code.display,
version: code.version,
category: code.category,
}),
text,
code.id,
],
);
processed++;
if (processed % 10 === 0) {
this.logger.log(
`Processed ${processed}/${codesWithoutEmbedding.rows.length} sample embeddings in category '${category}'`,
);
}
} catch (error) {
this.logger.error(`Error processing code ${code.code}:`, error);
errors++;
}
}
this.logger.log(
`Sample embedding generation completed for category '${category}'. Processed: ${processed}, Errors: ${errors}, Total Sample: ${codesWithoutEmbedding.rows.length}`,
);
return {
processed,
errors,
totalSample: codesWithoutEmbedding.rows.length,
category,
};
} catch (error) {
this.logger.error(
`Error in generateAndStoreSampleEmbeddingsByCategory for category '${category}':`,
error,
);
throw error;
}
}
/**
* Vector similarity search menggunakan pgvector
*/
async vectorSearch(
query: string,
limit: number = 10,
category?: string,
threshold: number = 0.7,
): Promise<VectorSearchResult[]> {
try {
this.logger.log(`Performing pgvector search for: ${query}`);
if (!this.embeddings) {
throw new Error('OpenAI embeddings not initialized');
}
// Generate embedding for query
const queryEmbedding = await this.generateEmbedding(query);
// Convert embedding array to proper vector format for pgvector
const vectorString = `[${queryEmbedding.join(',')}]`;
// Build SQL query for vector similarity search
let sql = `
SELECT
id, code, display, version, category,
1 - (embedding <=> $1::vector) as similarity
FROM icd_codes
WHERE embedding IS NOT NULL
`;
const params: any[] = [vectorString];
let paramIndex = 2;
if (category) {
sql += ` AND category = $${paramIndex}`;
params.push(category);
paramIndex++;
}
sql += ` ORDER BY embedding <=> $1::vector ASC LIMIT $${paramIndex}`;
params.push(limit);
// Execute raw SQL query
const result = await this.pool.query(sql, params);
// Transform and filter results
const filteredResults: VectorSearchResult[] = result.rows
.filter((row: any) => row.similarity >= threshold)
.map((row: any) => ({
id: row.id,
code: row.code,
display: row.display,
version: row.version,
category: row.category,
similarity: parseFloat(row.similarity),
}));
this.logger.log(
`Pgvector search returned ${filteredResults.length} results for query: "${query}"`,
);
return filteredResults;
} catch (error) {
this.logger.error('Error in pgvector search:', error);
throw error;
}
}
/**
* Hybrid search: combine vector similarity dengan text search
*/
async hybridSearch(
query: string,
limit: number = 10,
category?: string,
vectorWeight: number = 0.7,
textWeight: number = 0.3,
): Promise<VectorSearchResult[]> {
try {
this.logger.log(`Performing hybrid search for: ${query}`);
// Get vector search results
const vectorResults = await this.vectorSearch(
query,
limit * 2,
category,
0.5,
);
// Get text search results
const textResults = await this.textSearch(query, limit * 2, category);
// Combine and score results
const combinedResults = new Map<string, VectorSearchResult>();
// Add vector results
for (const result of vectorResults) {
combinedResults.set(result.id, {
...result,
similarity: result.similarity * vectorWeight,
});
}
// Add text results with text scoring
for (const result of textResults) {
const existing = combinedResults.get(result.id);
if (existing) {
// Combine scores
existing.similarity += (result.similarity || 0.5) * textWeight;
} else {
combinedResults.set(result.id, {
...result,
similarity: (result.similarity || 0.5) * textWeight,
});
}
}
// Convert to array, sort by combined score, and limit
const results = Array.from(combinedResults.values());
results.sort((a, b) => b.similarity - a.similarity);
return results.slice(0, limit);
} catch (error) {
this.logger.error('Error in hybrid search:', error);
throw error;
}
}
/**
* Text-based search dengan scoring
*/
private async textSearch(
query: string,
limit: number,
category?: string,
): Promise<VectorSearchResult[]> {
try {
let sql = 'SELECT id, code, display, version, category FROM icd_codes';
const params: any[] = [];
let whereConditions: string[] = [];
let paramIndex = 1;
if (category) {
whereConditions.push(`category = $${paramIndex}`);
params.push(category);
paramIndex++;
}
if (query) {
whereConditions.push(
`(code ILIKE $${paramIndex} OR display ILIKE $${paramIndex})`,
);
params.push(`%${query}%`);
paramIndex++;
}
if (whereConditions.length > 0) {
sql += ' WHERE ' + whereConditions.join(' AND ');
}
sql += ' ORDER BY code ASC LIMIT $' + paramIndex;
params.push(limit);
const result = await this.pool.query(sql, params);
return result.rows.map((code) => ({
id: code.id,
code: code.code,
display: code.display,
version: code.version,
category: code.category,
similarity: 0.5, // Default text similarity score
}));
} catch (error) {
this.logger.error('Error in text search:', error);
throw error;
}
}
/**
* Get embedding statistics
*/
async getEmbeddingStats(): Promise<{
total: number;
withEmbeddings: number;
withoutEmbeddings: number;
percentage: number;
vectorStoreStatus: string;
}> {
try {
// Use raw SQL to get embedding statistics
const [totalResult, withEmbeddingsResult] = await Promise.all([
this.pool.query('SELECT COUNT(*) as count FROM icd_codes'),
this.pool.query(
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
),
]);
const total = parseInt(totalResult.rows[0].count);
const withEmbeddings = parseInt(withEmbeddingsResult.rows[0].count);
const withoutEmbeddings = total - withEmbeddings;
const percentage = total > 0 ? (withEmbeddings / total) * 100 : 0;
const vectorStoreStatus = this.vectorStore
? 'Initialized'
: 'Not Initialized';
return {
total,
withEmbeddings,
withoutEmbeddings,
percentage: Math.round(percentage * 100) / 100,
vectorStoreStatus,
};
} catch (error) {
this.logger.error('Error getting embedding stats:', error);
throw error;
}
}
/**
* Refresh vector store dengan data terbaru
*/
async refreshVectorStore(): Promise<void> {
try {
this.logger.log('Refreshing pgvector store...');
await this.initializeVectorStore();
this.logger.log('Pgvector store refreshed successfully');
} catch (error) {
this.logger.error('Error refreshing pgvector store:', error);
throw error;
}
}
/**
* Get vector store status
*/
async getVectorStoreStatus(): Promise<{
initialized: boolean;
documentCount: number;
embeddingModel: string;
lastUpdated: Date;
}> {
try {
// Get document count from database using raw SQL
const result = await this.pool.query(
'SELECT COUNT(*) as count FROM icd_codes WHERE embedding IS NOT NULL',
);
const documentCount = parseInt(result.rows[0].count);
const status = {
initialized: !!this.vectorStore,
documentCount,
embeddingModel: this.embeddings
? `OpenAI ${process.env.OPENAI_API_MODEL || 'text-embedding-ada-002'}`
: 'Not Available',
lastUpdated: new Date(),
};
return status;
} catch (error) {
this.logger.error('Error getting vector store status:', error);
throw error;
}
}
/**
* Cleanup resources
*/
async onModuleDestroy() {
await this.prisma.$disconnect();
await this.pool.end();
}
}

View File

@@ -1,8 +1,133 @@
import { NestFactory } from '@nestjs/core';
import { AppModule } from './app.module';
import { Logger, ValidationPipe } from '@nestjs/common';
import { DocumentBuilder, SwaggerModule } from '@nestjs/swagger';
async function bootstrap() {
const logger = new Logger('Bootstrap');
const app = await NestFactory.create(AppModule);
await app.listen(process.env.PORT ?? 3000);
// Environment configuration
const port = process.env.PORT ?? 3000;
const host = process.env.HOST ?? 'localhost';
const nodeEnv = process.env.NODE_ENV ?? 'development';
// CORS Configuration
const corsOrigins = process.env.CORS_ORIGINS?.split(',') ?? [
'http://localhost:3000',
];
const corsMethods = process.env.CORS_METHODS?.split(',') ?? [
'GET',
'HEAD',
'PUT',
'PATCH',
'POST',
'DELETE',
'OPTIONS',
];
const corsHeaders = process.env.CORS_HEADERS?.split(',') ?? [
'Content-Type',
'Accept',
'Authorization',
'X-Requested-With',
];
const corsCredentials = process.env.CORS_CREDENTIALS === 'true';
// Enable CORS
app.enableCors({
origin: corsOrigins,
methods: corsMethods,
allowedHeaders: corsHeaders,
credentials: corsCredentials,
});
// Enable global validation pipe
app.useGlobalPipes(
new ValidationPipe({
whitelist: true,
forbidNonWhitelisted: true,
transform: true,
transformOptions: {
enableImplicitConversion: true,
},
}),
);
// Setup Swagger Documentation
if (process.env.ENABLE_DOCS === 'true') {
const config = new DocumentBuilder()
.setTitle('Claim Guard API')
.setDescription(
'API documentation for Claim Guard Backend - ICD Code Management System',
)
.setVersion('1.0.0')
.setContact(
'Development Team',
'https://github.com/your-org/claim-guard-be',
'dev@yourdomain.com',
)
.setLicense('MIT', 'https://opensource.org/licenses/MIT')
.addServer(
process.env.APP_URL || 'http://localhost:3000',
'Development Server',
)
.addTag('ICD', 'ICD Code management operations')
.addTag('Health', 'Application health and monitoring')
.addBearerAuth(
{
type: 'http',
scheme: 'bearer',
bearerFormat: 'JWT',
name: 'JWT',
description: 'Enter JWT token',
in: 'header',
},
'JWT-auth',
)
.build();
const document = SwaggerModule.createDocument(app, config);
SwaggerModule.setup('docs', app, document, {
swaggerOptions: {
persistAuthorization: true,
docExpansion: 'none',
filter: true,
showRequestDuration: true,
},
customSiteTitle: 'Claim Guard API Documentation',
customfavIcon: '/favicon.ico',
customCss: '.swagger-ui .topbar { display: none }',
});
logger.log(
`📚 Swagger Documentation enabled at: http://${host}:${port}/docs`,
);
}
bootstrap();
// Global prefix for API endpoints (optional)
// app.setGlobalPrefix('api/v1');
// Request timeout
const requestTimeout = parseInt(process.env.REQUEST_TIMEOUT ?? '30000');
// Graceful shutdown
app.enableShutdownHooks();
await app.listen(port, host);
logger.log(`🚀 Application is running on: http://${host}:${port}`);
logger.log(`🌍 Environment: ${nodeEnv}`);
logger.log(`🔐 CORS Origins: ${corsOrigins.join(', ')}`);
if (process.env.HEALTH_CHECK_ENABLED === 'true') {
logger.log(
`❤️ Health Check available at: http://${host}:${port}${process.env.HEALTH_CHECK_PATH || '/health'}`,
);
}
}
bootstrap().catch((error) => {
console.error('❌ Error starting server:', error);
process.exit(1);
});

112
verify_migration.sql Normal file
View File

@@ -0,0 +1,112 @@
-- =====================================================
-- VERIFICATION: pgvector Migration Success
-- =====================================================
-- File: verify_migration.sql
--
-- Cara penggunaan:
-- 1. Connect ke database: psql -d claim_guard -U username
-- 2. Jalankan: \i verify_migration.sql
-- =====================================================
\echo '🎉 VERIFYING pgvector MIGRATION SUCCESS 🎉'
\echo '====================================================='
-- Check pgvector extension
SELECT
'pgvector Extension' as component,
CASE
WHEN EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector')
THEN '✅ INSTALLED'
ELSE '❌ NOT INSTALLED'
END as status;
-- Check table structure
SELECT
'icd_codes Table' as component,
CASE
WHEN EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'icd_codes')
THEN '✅ EXISTS'
ELSE '❌ MISSING'
END as status;
-- Check all columns
SELECT
column_name,
data_type,
is_nullable,
CASE
WHEN column_name IN ('embedding', 'metadata', 'content')
THEN '✅ pgvector column'
ELSE ' Standard column'
END as status
FROM information_schema.columns
WHERE table_name = 'icd_codes'
ORDER BY column_name;
-- Check indexes
SELECT
indexname,
indexdef,
CASE
WHEN indexname LIKE '%embedding%' OR indexname LIKE '%metadata%'
THEN '✅ Performance index'
ELSE ' Standard index'
END as status
FROM pg_indexes
WHERE tablename = 'icd_codes'
ORDER BY indexname;
-- Test pgvector functionality
\echo ''
\echo '🧪 TESTING pgvector FUNCTIONALITY 🧪'
-- Test vector creation
SELECT
'[1,2,3,4,5]'::vector(5) as test_vector_5d,
'[0.1,0.2,0.3]'::vector(3) as test_vector_3d;
-- Test table data
SELECT
COUNT(*) as total_rows,
COUNT(embedding) as rows_with_embeddings,
COUNT(metadata) as rows_with_metadata,
COUNT(content) as rows_with_content
FROM icd_codes;
-- Check sample data structure
SELECT
id,
code,
display,
version,
category,
CASE
WHEN embedding IS NOT NULL THEN '✅ Has embedding'
ELSE '❌ No embedding'
END as embedding_status,
CASE
WHEN metadata IS NOT NULL THEN '✅ Has metadata'
ELSE '❌ No metadata'
END as metadata_status,
CASE
WHEN content IS NOT NULL THEN '✅ Has content'
ELSE '❌ No content'
END as content_status
FROM icd_codes
LIMIT 5;
\echo ''
\echo '🎯 MIGRATION VERIFICATION COMPLETE! 🎯'
\echo '====================================================='
\echo '✅ pgvector extension installed'
\echo '✅ embedding column (vector type) added'
\echo '✅ metadata column (JSONB) added'
\echo '✅ content column (TEXT) added'
\echo '✅ Performance indexes created'
\echo ''
\echo '🚀 NEXT STEPS:'
\echo '1. Start application: npm run start:dev'
\echo '2. Initialize vector store: POST /pgvector/initialize'
\echo '3. Generate embeddings: POST /pgvector/generate-and-store-all-embeddings'
\echo '4. Test vector search: POST /pgvector/search'
\echo '====================================================='