-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathsql_script.sql
More file actions
117 lines (106 loc) · 3.14 KB
/
sql_script.sql
File metadata and controls
117 lines (106 loc) · 3.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
-- Enable the pgvector extension
create extension if not exists vector;
-- Create the documentation chunks table
create table site_pages (
id bigserial primary key,
url varchar not null,
chunk_number integer not null,
title varchar not null,
summary varchar not null,
content text not null, -- Added content column
metadata jsonb not null default '{}'::jsonb, -- Added metadata column
embedding vector(1536), -- OpenAI embeddings are 1536 dimensions
created_at timestamp with time zone default timezone('utc'::text, now()) not null,
-- Add a unique constraint to prevent duplicate chunks for the same URL
unique(url, chunk_number)
);
-- Create an index for better vector similarity search performance
create index on site_pages using ivfflat (embedding vector_cosine_ops);
-- Create an index on metadata for faster filtering
create index idx_site_pages_metadata on site_pages using gin (metadata);
-- Create a function to search for documentation chunks
create function match_site_pages (
query_embedding vector(1536),
match_count int default 10,
filter jsonb DEFAULT '{}'::jsonb
) returns table (
id bigint,
url varchar,
chunk_number integer,
title varchar,
summary varchar,
content text,
metadata jsonb,
similarity float
)
language plpgsql
as $$
#variable_conflict use_column
begin
return query
select
id,
url,
chunk_number,
title,
summary,
content,
metadata,
1 - (site_pages.embedding <=> query_embedding) as similarity
from site_pages
where metadata @> filter
order by site_pages.embedding <=> query_embedding
limit match_count;
end;
$$;
-- Create the table for processed n8n workflows and their summaries
CREATE TABLE workflows (
workflow_id INTEGER PRIMARY KEY,
workflow_name TEXT NOT NULL,
workflow_description TEXT NOT NULL,
workflow_json JSONB NOT NULL,
n8n_demo TEXT NOT NULL,
summary_accomplishment TEXT NOT NULL,
summary_nodes TEXT NOT NULL,
summary_suggestions TEXT NOT NULL,
embedding vector(1536),
content TEXT NOT NULL,
metadata JSONB
);
DROP FUNCTION IF EXISTS match_summaries(vector, integer, jsonb);
-- Create a function to search for summaries
create function match_summaries (
query_embedding vector(1536),
match_count int default null,
filter jsonb DEFAULT '{}'
) returns table (
id integer,
content text,
metadata jsonb,
similarity float
)
language plpgsql
as $$
#variable_conflict use_column
begin
return query
select
workflow_id as id,
content,
metadata,
1 - (workflows.embedding <=> query_embedding) as similarity
from workflows
where metadata @> filter
order by workflows.embedding <=> query_embedding
limit match_count;
end;
$$;
-- Everything above will work for any PostgreSQL database. The below commands are for Supabase security
-- Enable RLS on the table
alter table site_pages enable row level security;
-- Create a policy that allows anyone to read
create policy "Allow public read access"
on site_pages
for select
to public
using (true);