add pg vector and embed

This commit is contained in:
2025-08-22 19:34:54 +07:00
parent 21567a0a7c
commit b77beb2d85
27 changed files with 5273 additions and 216 deletions

View File

@@ -0,0 +1,26 @@
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- CreateTable
CREATE TABLE "icd_codes" (
"id" TEXT NOT NULL,
"code" TEXT NOT NULL,
"display" TEXT NOT NULL,
"version" TEXT NOT NULL,
"category" TEXT NOT NULL,
"embedding" vector(1536),
"createdAt" TIMESTAMP(3) NOT NULL DEFAULT CURRENT_TIMESTAMP,
"updatedAt" TIMESTAMP(3) NOT NULL,
CONSTRAINT "icd_codes_pkey" PRIMARY KEY ("id")
);
-- Create unique index on code
CREATE UNIQUE INDEX "icd_codes_code_key" ON "icd_codes"("code");
-- Create ivfflat index for fast vector similarity search
CREATE INDEX "icd_codes_embedding_idx" ON "icd_codes" USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Add comments for documentation
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';

View File

@@ -0,0 +1,27 @@
-- Migration: Add pgvector support to icd_codes table
-- Enable pgvector extension
CREATE EXTENSION IF NOT EXISTS vector;
-- Add embedding column with pgvector type
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "embedding" vector(1536);
-- Add metadata column for LangChain pgvector
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "metadata" JSONB;
-- Add content column for LangChain pgvector
ALTER TABLE "icd_codes" ADD COLUMN IF NOT EXISTS "content" TEXT;
-- Create ivfflat index for fast vector similarity search
CREATE INDEX IF NOT EXISTS "icd_codes_embedding_idx" ON "icd_codes"
USING ivfflat (embedding vector_cosine_ops) WITH (lists = 100);
-- Create index on metadata for fast JSON queries
CREATE INDEX IF NOT EXISTS "icd_codes_metadata_idx" ON "icd_codes" USING GIN (metadata);
-- Add comments for documentation
COMMENT ON COLUMN "icd_codes"."embedding" IS 'Vector embedding for semantic search using pgvector (1536 dimensions)';
COMMENT ON COLUMN "icd_codes"."metadata" IS 'JSON metadata for LangChain pgvector operations';
COMMENT ON COLUMN "icd_codes"."content" IS 'Text content for LangChain pgvector operations';
COMMENT ON INDEX "icd_codes_embedding_idx" IS 'IVFFlat index for fast cosine similarity search with 100 lists';
COMMENT ON INDEX "icd_codes_metadata_idx" IS 'GIN index for fast JSON metadata queries';

View File

@@ -0,0 +1,3 @@
# Please do not edit this file manually
# It should be added in your version-control system (e.g., Git)
provider = "postgresql"