From 2821fafc854abcfb279e1a48573ab3dd5d0ebaeb Mon Sep 17 00:00:00 2001
From: my-vegetable-has-exploded <wy1109468038@gmail.com>
Date: Tue, 19 Mar 2024 04:54:57 +0000
Subject: [PATCH] rm perf.sh & refactor examples.

Signed-off-by: my-vegetable-has-exploded <wy1109468038@gmail.com>
---
 bindings/python/examples/perf.sh              |   5 -
 .../python/examples/psycopg_copy_dense.py     |  57 +++++++++
 .../python/examples/psycopg_copy_example.py   | 117 ------------------
 .../python/examples/psycopg_copy_sparse.py    |  52 ++++++++
 4 files changed, 109 insertions(+), 122 deletions(-)
 delete mode 100644 bindings/python/examples/perf.sh
 create mode 100644 bindings/python/examples/psycopg_copy_dense.py
 delete mode 100644 bindings/python/examples/psycopg_copy_example.py
 create mode 100644 bindings/python/examples/psycopg_copy_sparse.py

diff --git a/bindings/python/examples/perf.sh b/bindings/python/examples/perf.sh
deleted file mode 100644
index 26baa4bba..000000000
--- a/bindings/python/examples/perf.sh
+++ /dev/null
@@ -1,5 +0,0 @@
-/bin/python3 /home/envd/pgvecto.rs/bindings/python/examples/psycopg_testcpy.py &
-PID=$(ps aux | grep "/usr/lib/postgresql/15/bin/postgres" | grep -v grep | awk '{print $2}')
-# use perf to collect data
-sudo /usr/lib/linux-tools/5.15.0-97-generic/perf record -e cpu-clock -F 1000 -g -p $PID -- sleep 30
-sudo /usr/lib/linux-tools/5.15.0-97-generic/perf script -i perf.data > insert.out.perf
\ No newline at end of file
diff --git a/bindings/python/examples/psycopg_copy_dense.py b/bindings/python/examples/psycopg_copy_dense.py
new file mode 100644
index 000000000..c711910f9
--- /dev/null
+++ b/bindings/python/examples/psycopg_copy_dense.py
@@ -0,0 +1,57 @@
+import os
+
+import numpy as np
+import psycopg
+
+from pgvecto_rs.psycopg import register_vector
+
+URL = "postgresql://{username}:{password}@{host}:{port}/{db_name}".format(
+    port=os.getenv("DB_PORT", "5432"),
+    host=os.getenv("DB_HOST", "localhost"),
+    username=os.getenv("DB_USER", "postgres"),
+    password=os.getenv("DB_PASS", "mysecretpassword"),
+    db_name=os.getenv("DB_NAME", "postgres"),
+)
+
+# Connect to the DB and init things
+with psycopg.connect(URL) as conn:
+    conn.execute("CREATE EXTENSION IF NOT EXISTS vectors;")
+    register_vector(conn)
+    conn.execute("DROP TABLE IF EXISTS documents;")
+    conn.execute(
+        "CREATE TABLE documents (id SERIAL PRIMARY KEY, embedding vector(3) NOT NULL);",
+    )
+    conn.commit()
+    try:
+        embeddings = [
+            np.array([1, 2, 3]),
+            np.array([1.0, 2.0, 4.0]),
+            np.array([1, 3, 4]),
+        ]
+
+        with conn.cursor() as cursor, cursor.copy(
+            "COPY documents (embedding) FROM STDIN (FORMAT BINARY)"
+        ) as copy:
+            # write row by row
+            for e in embeddings:
+                copy.write_row([e])
+            copy.write_row([[1, 3, 5]])
+        conn.commit()
+
+        # Select the rows using binary format
+        cur = conn.execute(
+            "SELECT * FROM documents;",
+            binary=True,
+        )
+        for row in cur.fetchall():
+            print(row[0], ": ", row[1])
+
+        # output will be:
+        # 1 :  [1.0, 2.0, 3.0]
+        # 2 :  [1.0, 2.0, 4.0]
+        # 3 :  [1.0, 3.0, 4.0]
+        # 4 :  [1.0, 3.0, 5.0]
+    finally:
+        # Drop the table
+        conn.execute("DROP TABLE IF EXISTS documents;")
+        conn.commit()
diff --git a/bindings/python/examples/psycopg_copy_example.py b/bindings/python/examples/psycopg_copy_example.py
deleted file mode 100644
index 6543e2b8b..000000000
--- a/bindings/python/examples/psycopg_copy_example.py
+++ /dev/null
@@ -1,117 +0,0 @@
-import os
-
-import numpy as np
-import pandas as pd
-import psycopg
-import pyarrow as pa
-import pyarrow.parquet as pq
-
-from pgvecto_rs.psycopg import register_vector
-from pgvecto_rs.types import SparseVector
-
-URL = "postgresql://{username}:{password}@{host}:{port}/{db_name}".format(
-    port=os.getenv("DB_PORT", "5432"),
-    host=os.getenv("DB_HOST", "localhost"),
-    username=os.getenv("DB_USER", "postgres"),
-    password=os.getenv("DB_PASS", "mysecretpassword"),
-    db_name=os.getenv("DB_NAME", "postgres"),
-)
-
-
-def copy_by_row(conn: psycopg.Connection):
-    conn.execute("DROP TABLE IF EXISTS documents;")
-    conn.execute(
-        "CREATE TABLE documents (id SERIAL PRIMARY KEY, embedding vector(3) NOT NULL);",
-    )
-    conn.commit()
-    try:
-        # create a parquet file , and write row into it.
-        table = pa.Table.from_pandas(
-            pd.DataFrame(
-                {
-                    "embedding": [
-                        np.array([1, 2, 3]),
-                        np.array([1.0, 2.0, 4.0]),
-                        np.array([1, 3, 4]),
-                    ]
-                }
-            )
-        )
-        pq.write_table(table, "test.parquet")
-
-        # load vectors from parquet file
-        table = pq.read_table("test.parquet")
-        # TODO: Is there a better way to convert pyarrow table to numpy array to reduce copy overhead?
-        embeddings = table.column("embedding").to_numpy()
-
-        with conn.cursor() as cursor, cursor.copy(
-            "COPY documents (embedding) FROM STDIN (FORMAT BINARY)"
-        ) as copy:
-            # write row by row
-            for e in embeddings:
-                copy.write_row([e])
-            copy.write_row([np.array([1, 3, 5])])
-        conn.commit()
-
-        # Select the rows using binary format
-        cur = conn.execute(
-            "SELECT * FROM documents;",
-            binary=True,
-        )
-        for row in cur.fetchall():
-            print(row[0], ": ", row[1])
-
-        # output will be:
-        # 1 :  [1.0, 2.0, 3.0]
-        # 2 :  [1.0, 2.0, 4.0]
-        # 3 :  [1.0, 3.0, 4.0]
-        # 4 :  [1.0, 3.0, 5.0]
-    finally:
-        # Drop the table
-        conn.execute("DROP TABLE IF EXISTS documents;")
-        conn.commit()
-
-
-def copy_sparse_by_row(conn: psycopg.Connection):
-    conn.execute("DROP TABLE IF EXISTS documents;")
-    conn.execute(
-        "CREATE TABLE documents (id SERIAL PRIMARY KEY, embedding svector NOT NULL);",
-    )
-    conn.commit()
-    try:
-        with conn.cursor() as cursor, cursor.copy(
-            "COPY documents (embedding) FROM STDIN (FORMAT BINARY)"
-        ) as copy:
-            copy.write_row([SparseVector(3, [0, 2], [1.0, 3.0])])
-            copy.write_row([SparseVector(3, np.array([0, 1, 2]), [1.0, 2.0, 3.0])])
-            copy.write_row([SparseVector(3, np.array([1, 2]), np.array([2.0, 3.0]))])
-        conn.commit()
-
-        # Select the rows using binary format
-        cur = conn.execute(
-            "SELECT * FROM documents;",
-            binary=True,
-        )
-        for row in cur.fetchall():
-            print(row[0], ": ", row[1])
-
-        # output will be:
-        # 1 :  [1.0, 0.0, 3.0]
-        # 2 :  [1.0, 2.0, 3.0]
-        # 3 :  [0.0, 2.0, 3.0]
-    finally:
-        # Drop the table
-        conn.execute("DROP TABLE IF EXISTS documents;")
-        conn.commit()
-
-
-# Connect to the DB and init things
-with psycopg.connect(URL) as conn:
-    conn.execute("CREATE EXTENSION IF NOT EXISTS vectors;")
-    register_vector(conn)
-
-    # example for vectorf32
-    copy_by_row(conn)
-
-    # example for sparse vector
-    copy_sparse_by_row(conn)
diff --git a/bindings/python/examples/psycopg_copy_sparse.py b/bindings/python/examples/psycopg_copy_sparse.py
new file mode 100644
index 000000000..861a71658
--- /dev/null
+++ b/bindings/python/examples/psycopg_copy_sparse.py
@@ -0,0 +1,52 @@
+import os
+
+import numpy as np
+import psycopg
+
+from pgvecto_rs.psycopg import register_vector
+from pgvecto_rs.types import SparseVector
+
+URL = "postgresql://{username}:{password}@{host}:{port}/{db_name}".format(
+    port=os.getenv("DB_PORT", "5432"),
+    host=os.getenv("DB_HOST", "localhost"),
+    username=os.getenv("DB_USER", "postgres"),
+    password=os.getenv("DB_PASS", "mysecretpassword"),
+    db_name=os.getenv("DB_NAME", "postgres"),
+)
+
+
+# Connect to the DB and init things
+with psycopg.connect(URL) as conn:
+    conn.execute("CREATE EXTENSION IF NOT EXISTS vectors;")
+    register_vector(conn)
+    conn.execute("DROP TABLE IF EXISTS documents;")
+    conn.execute(
+        "CREATE TABLE documents (id SERIAL PRIMARY KEY, embedding svector NOT NULL);",
+    )
+    conn.commit()
+    try:
+        with conn.cursor() as cursor, cursor.copy(
+            "COPY documents (embedding) FROM STDIN (FORMAT BINARY)"
+        ) as copy:
+            copy.write_row([SparseVector(3, [0, 2], [1.0, 3.0])])
+            copy.write_row([SparseVector(3, np.array([0, 1, 2]), [1.0, 2.0, 3.0])])
+            copy.write_row([SparseVector(3, np.array([1, 2]), np.array([2.0, 3.0]))])
+        conn.pgconn.flush()
+        conn.commit()
+
+        # Select the rows using binary format
+        cur = conn.execute(
+            "SELECT * FROM documents;",
+            binary=True,
+        )
+        for row in cur.fetchall():
+            print(row[0], ": ", row[1])
+
+        # output will be:
+        # 1 :  [1.0, 0.0, 3.0]
+        # 2 :  [1.0, 2.0, 3.0]
+        # 3 :  [0.0, 2.0, 3.0]
+    finally:
+        # Drop the table
+        conn.execute("DROP TABLE IF EXISTS documents;")
+        conn.commit()