-
Notifications
You must be signed in to change notification settings - Fork 73
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: Support binary copy in python SDK. (#419)
* add testcpy. Signed-off-by: my-vegetable-has-exploded <[email protected]> * add copy by blocks Signed-off-by: my-vegetable-has-exploded <[email protected]> * add perf tests. Signed-off-by: my-vegetable-has-exploded <[email protected]> * move to module. Signed-off-by: my-vegetable-has-exploded <[email protected]> * fix tests & add parquet example. Signed-off-by: my-vegetable-has-exploded <[email protected]> * add sparse vector. Signed-off-by: my-vegetable-has-exploded <[email protected]> * add tests & comments. Signed-off-by: my-vegetable-has-exploded <[email protected]> * fix typos. Signed-off-by: my-vegetable-has-exploded <[email protected]> * chores: use nightly in workflow. Signed-off-by: my-vegetable-has-exploded <[email protected]> * rm perf.sh & refactor examples. Signed-off-by: my-vegetable-has-exploded <[email protected]> --------- Signed-off-by: my-vegetable-has-exploded <[email protected]>
- Loading branch information
1 parent
3789c7d
commit 4b1147f
Showing
10 changed files
with
474 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,57 @@ | ||
import os | ||
|
||
import numpy as np | ||
import psycopg | ||
|
||
from pgvecto_rs.psycopg import register_vector | ||
|
||
URL = "postgresql://{username}:{password}@{host}:{port}/{db_name}".format( | ||
port=os.getenv("DB_PORT", "5432"), | ||
host=os.getenv("DB_HOST", "localhost"), | ||
username=os.getenv("DB_USER", "postgres"), | ||
password=os.getenv("DB_PASS", "mysecretpassword"), | ||
db_name=os.getenv("DB_NAME", "postgres"), | ||
) | ||
|
||
# Connect to the DB and init things | ||
with psycopg.connect(URL) as conn: | ||
conn.execute("CREATE EXTENSION IF NOT EXISTS vectors;") | ||
register_vector(conn) | ||
conn.execute("DROP TABLE IF EXISTS documents;") | ||
conn.execute( | ||
"CREATE TABLE documents (id SERIAL PRIMARY KEY, embedding vector(3) NOT NULL);", | ||
) | ||
conn.commit() | ||
try: | ||
embeddings = [ | ||
np.array([1, 2, 3]), | ||
np.array([1.0, 2.0, 4.0]), | ||
np.array([1, 3, 4]), | ||
] | ||
|
||
with conn.cursor() as cursor, cursor.copy( | ||
"COPY documents (embedding) FROM STDIN (FORMAT BINARY)" | ||
) as copy: | ||
# write row by row | ||
for e in embeddings: | ||
copy.write_row([e]) | ||
copy.write_row([[1, 3, 5]]) | ||
conn.commit() | ||
|
||
# Select the rows using binary format | ||
cur = conn.execute( | ||
"SELECT * FROM documents;", | ||
binary=True, | ||
) | ||
for row in cur.fetchall(): | ||
print(row[0], ": ", row[1]) | ||
|
||
# output will be: | ||
# 1 : [1.0, 2.0, 3.0] | ||
# 2 : [1.0, 2.0, 4.0] | ||
# 3 : [1.0, 3.0, 4.0] | ||
# 4 : [1.0, 3.0, 5.0] | ||
finally: | ||
# Drop the table | ||
conn.execute("DROP TABLE IF EXISTS documents;") | ||
conn.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
import os | ||
|
||
import numpy as np | ||
import psycopg | ||
|
||
from pgvecto_rs.psycopg import register_vector | ||
from pgvecto_rs.types import SparseVector | ||
|
||
URL = "postgresql://{username}:{password}@{host}:{port}/{db_name}".format( | ||
port=os.getenv("DB_PORT", "5432"), | ||
host=os.getenv("DB_HOST", "localhost"), | ||
username=os.getenv("DB_USER", "postgres"), | ||
password=os.getenv("DB_PASS", "mysecretpassword"), | ||
db_name=os.getenv("DB_NAME", "postgres"), | ||
) | ||
|
||
|
||
# Connect to the DB and init things | ||
with psycopg.connect(URL) as conn: | ||
conn.execute("CREATE EXTENSION IF NOT EXISTS vectors;") | ||
register_vector(conn) | ||
conn.execute("DROP TABLE IF EXISTS documents;") | ||
conn.execute( | ||
"CREATE TABLE documents (id SERIAL PRIMARY KEY, embedding svector NOT NULL);", | ||
) | ||
conn.commit() | ||
try: | ||
with conn.cursor() as cursor, cursor.copy( | ||
"COPY documents (embedding) FROM STDIN (FORMAT BINARY)" | ||
) as copy: | ||
copy.write_row([SparseVector(3, [0, 2], [1.0, 3.0])]) | ||
copy.write_row([SparseVector(3, np.array([0, 1, 2]), [1.0, 2.0, 3.0])]) | ||
copy.write_row([SparseVector(3, np.array([1, 2]), np.array([2.0, 3.0]))]) | ||
conn.pgconn.flush() | ||
conn.commit() | ||
|
||
# Select the rows using binary format | ||
cur = conn.execute( | ||
"SELECT * FROM documents;", | ||
binary=True, | ||
) | ||
for row in cur.fetchall(): | ||
print(row[0], ": ", row[1]) | ||
|
||
# output will be: | ||
# 1 : [1.0, 0.0, 3.0] | ||
# 2 : [1.0, 2.0, 3.0] | ||
# 3 : [0.0, 2.0, 3.0] | ||
finally: | ||
# Drop the table | ||
conn.execute("DROP TABLE IF EXISTS documents;") | ||
conn.commit() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
import collections | ||
|
||
SparseVector = collections.namedtuple("SparseVector", "dims indices values") | ||
|
||
|
||
def print_sparse_vector(sparse_vector): | ||
dense = [0.0] * sparse_vector.dims | ||
for i, v in zip(sparse_vector.indices, sparse_vector.values): | ||
dense[i] = v | ||
return str(dense) | ||
|
||
|
||
# override __str__ method of SparseVector | ||
SparseVector.__str__ = print_sparse_vector |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.