add pg vector and embed

This commit is contained in:
2025-08-22 19:34:54 +07:00
parent 21567a0a7c
commit b77beb2d85
27 changed files with 5273 additions and 216 deletions

View File

@@ -0,0 +1,26 @@
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- CreateTable
CREATE TABLE "icd_codes" (
"id" TEXT NOT NULL,
"code" TEXT NOT NULL,
"display" TEXT NOT NULL,
"version" TEXT NOT NULL,
"category" TEXT NOT NULL,
"embedding" vector(1536),
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "icd_codes_pkey" PRIMARY KEY ("id")
);
-- Create unique index on code
CREATE UNIQUE INDEX "icd_codes_code_key" ON "icd_codes"("code");
-- Create ivfflat index for fast vector similarity search
CREATE INDEX "icd_codes_embedding_idx" ON "icd_codes" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Add comments for documentation
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';

View File

@@ -0,0 +1,27 @@
-- Migration: Add pgvector support to icd_codes table
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column with pgvector type
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "embedding" vector(1536);
-- Add metadata column for LangChain pgvector
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "metadata" JSONB;
-- Add content column for LangChain pgvector
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "content" TEXT;
-- Create ivfflat index for fast vector similarity search
CREATE INDEX IF NOT EXISTS "icd_codes_embedding_idx" ON "icd_codes"
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Create index on metadata for fast JSON queries
CREATE INDEX IF NOT EXISTS "icd_codes_metadata_idx" ON "icd_codes" USING GIN (metadata);
-- Add comments for documentation
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
COMMENT ON COLUMN "icd_codes"."metadata" IS 'JSON metadata for LangChain pgvector operations';
COMMENT ON COLUMN "icd_codes"."content" IS 'Text content for LangChain pgvector operations';
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';
COMMENT ON INDEX "icd_codes_metadata_idx" IS 'GIN index for fast JSON metadata queries';

View File

@@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "postgresql"

View File

@@ -1,12 +1,5 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
// Looking for ways to speed up your queries, or scale easily with your serverless or edge functions?
// Try Prisma Accelerate: https://pris.ly/cli/accelerate-init
generator client {
provider = "prisma-client-js"
output = "../generated/prisma"
}
datasource db {
@@ -15,13 +8,18 @@ datasource db {
}
model IcdCode {
id String @id @default(uuid())
code String @unique
id String @id @default(uuid())
code String @unique
display String
version String
category String // "ICD9" or "ICD10"
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
category String
embedding Unsupported("vector")?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
metadata Json?
content String?
@@index([embedding])
@@index([metadata], type: Gin)
@@map("icd_codes")
}