add pg vector and embed

This commit is contained in:
2025-08-22 19:34:54 +07:00
parent 21567a0a7c
commit b77beb2d85
27 changed files with 5273 additions and 216 deletions

112
verify_migration.sql Normal file
View File

@@ -0,0 +1,112 @@
-- =====================================================
-- VERIFICATION: pgvector Migration Success
-- =====================================================
-- File: verify_migration.sql
--
-- Cara penggunaan:
-- 1. Connect ke database: psql -d claim_guard -U username
-- 2. Jalankan: \i verify_migration.sql
-- =====================================================
\echo '🎉 VERIFYING pgvector MIGRATION SUCCESS 🎉'
\echo '====================================================='
-- Check pgvector extension
SELECT
'pgvector Extension' as component,
CASE
WHEN EXISTS (SELECT 1 FROM pg_extension WHERE extname = 'vector')
THEN '✅ INSTALLED'
ELSE '❌ NOT INSTALLED'
END as status;
-- Check table structure
SELECT
'icd_codes Table' as component,
CASE
WHEN EXISTS (SELECT 1 FROM information_schema.tables WHERE table_name = 'icd_codes')
THEN '✅ EXISTS'
ELSE '❌ MISSING'
END as status;
-- Check all columns
SELECT
column_name,
data_type,
is_nullable,
CASE
WHEN column_name IN ('embedding', 'metadata', 'content')
THEN '✅ pgvector column'
ELSE ' Standard column'
END as status
FROM information_schema.columns
WHERE table_name = 'icd_codes'
ORDER BY column_name;
-- Check indexes
SELECT
indexname,
indexdef,
CASE
WHEN indexname LIKE '%embedding%' OR indexname LIKE '%metadata%'
THEN '✅ Performance index'
ELSE ' Standard index'
END as status
FROM pg_indexes
WHERE tablename = 'icd_codes'
ORDER BY indexname;
-- Test pgvector functionality
\echo ''
\echo '🧪 TESTING pgvector FUNCTIONALITY 🧪'
-- Test vector creation
SELECT
'[1,2,3,4,5]'::vector(5) as test_vector_5d,
'[0.1,0.2,0.3]'::vector(3) as test_vector_3d;
-- Test table data
SELECT
COUNT(*) as total_rows,
COUNT(embedding) as rows_with_embeddings,
COUNT(metadata) as rows_with_metadata,
COUNT(content) as rows_with_content
FROM icd_codes;
-- Check sample data structure
SELECT
id,
code,
display,
version,
category,
CASE
WHEN embedding IS NOT NULL THEN '✅ Has embedding'
ELSE '❌ No embedding'
END as embedding_status,
CASE
WHEN metadata IS NOT NULL THEN '✅ Has metadata'
ELSE '❌ No metadata'
END as metadata_status,
CASE
WHEN content IS NOT NULL THEN '✅ Has content'
ELSE '❌ No content'
END as content_status
FROM icd_codes
LIMIT 5;
\echo ''
\echo '🎯 MIGRATION VERIFICATION COMPLETE! 🎯'
\echo '====================================================='
\echo '✅ pgvector extension installed'
\echo '✅ embedding column (vector type) added'
\echo '✅ metadata column (JSONB) added'
\echo '✅ content column (TEXT) added'
\echo '✅ Performance indexes created'
\echo ''
\echo '🚀 NEXT STEPS:'
\echo '1. Start application: npm run start:dev'
\echo '2. Initialize vector store: POST /pgvector/initialize'
\echo '3. Generate embeddings: POST /pgvector/generate-and-store-all-embeddings'
\echo '4. Test vector search: POST /pgvector/search'
\echo '====================================================='